1 // Copyright 2017 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 use std::collections::BTreeMap; 6 use std::fs::OpenOptions; 7 use std::os::unix::prelude::OpenOptionsExt; 8 9 use anyhow::anyhow; 10 use anyhow::Context; 11 use base::error; 12 use base::open_file_or_duplicate; 13 use base::warn; 14 use base::AsRawDescriptor; 15 use base::Event; 16 use base::RawDescriptor; 17 use base::WorkerThread; 18 use data_model::Le64; 19 use serde::Deserialize; 20 use serde::Serialize; 21 use vhost::Vhost; 22 use vhost::Vsock as VhostVsockHandle; 23 use vm_memory::GuestMemory; 24 use zerocopy::AsBytes; 25 26 use super::worker::VringBase; 27 use super::worker::Worker; 28 use super::Error; 29 use super::Result; 30 use crate::virtio::copy_config; 31 use crate::virtio::device_constants::vsock::NUM_QUEUES; 32 use crate::virtio::vsock::VsockConfig; 33 use crate::virtio::DeviceType; 34 use crate::virtio::Interrupt; 35 use crate::virtio::Queue; 36 use crate::virtio::VirtioDevice; 37 38 const QUEUE_SIZE: u16 = 256; 39 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES]; 40 41 pub struct Vsock { 42 worker_thread: Option<WorkerThread<Worker<VhostVsockHandle>>>, 43 vhost_handle: Option<VhostVsockHandle>, 44 cid: u64, 45 interrupts: Option<Vec<Event>>, 46 avail_features: u64, 47 acked_features: u64, 48 // vrings_base states: 49 // None - device was just created or is running. 50 // Some - device was put to sleep after running or was restored. 51 vrings_base: Option<Vec<VringBase>>, 52 // Some iff the device is active and awake. 53 event_queue: Option<Queue>, 54 // If true, we should send a TRANSPORT_RESET event to the guest at the next opportunity. 55 needs_transport_reset: bool, 56 } 57 58 #[derive(Serialize, Deserialize)] 59 struct VsockSnapshot { 60 cid: u64, 61 avail_features: u64, 62 acked_features: u64, 63 vrings_base: Vec<VringBase>, 64 } 65 66 impl Vsock { 67 /// Create a new virtio-vsock device with the given VM cid. new(base_features: u64, vsock_config: &VsockConfig) -> anyhow::Result<Vsock>68 pub fn new(base_features: u64, vsock_config: &VsockConfig) -> anyhow::Result<Vsock> { 69 let device_file = open_file_or_duplicate( 70 &vsock_config.vhost_device, 71 OpenOptions::new() 72 .read(true) 73 .write(true) 74 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK), 75 ) 76 .with_context(|| { 77 format!( 78 "failed to open virtual socket device {}", 79 vsock_config.vhost_device.display(), 80 ) 81 })?; 82 83 let handle = VhostVsockHandle::new(device_file); 84 85 let avail_features = base_features; 86 87 let mut interrupts = Vec::new(); 88 for _ in 0..NUM_QUEUES { 89 interrupts.push(Event::new().map_err(Error::VhostIrqCreate)?); 90 } 91 92 Ok(Vsock { 93 worker_thread: None, 94 vhost_handle: Some(handle), 95 cid: vsock_config.cid, 96 interrupts: Some(interrupts), 97 avail_features, 98 acked_features: 0, 99 vrings_base: None, 100 event_queue: None, 101 needs_transport_reset: false, 102 }) 103 } 104 new_for_testing(cid: u64, features: u64) -> Vsock105 pub fn new_for_testing(cid: u64, features: u64) -> Vsock { 106 Vsock { 107 worker_thread: None, 108 vhost_handle: None, 109 cid, 110 interrupts: None, 111 avail_features: features, 112 acked_features: 0, 113 vrings_base: None, 114 event_queue: None, 115 needs_transport_reset: false, 116 } 117 } 118 acked_features(&self) -> u64119 pub fn acked_features(&self) -> u64 { 120 self.acked_features 121 } 122 } 123 124 impl VirtioDevice for Vsock { keep_rds(&self) -> Vec<RawDescriptor>125 fn keep_rds(&self) -> Vec<RawDescriptor> { 126 let mut keep_rds = Vec::new(); 127 128 if let Some(handle) = &self.vhost_handle { 129 keep_rds.push(handle.as_raw_descriptor()); 130 } 131 132 if let Some(interrupt) = &self.interrupts { 133 for vhost_int in interrupt.iter() { 134 keep_rds.push(vhost_int.as_raw_descriptor()); 135 } 136 } 137 138 keep_rds 139 } 140 device_type(&self) -> DeviceType141 fn device_type(&self) -> DeviceType { 142 DeviceType::Vsock 143 } 144 queue_max_sizes(&self) -> &[u16]145 fn queue_max_sizes(&self) -> &[u16] { 146 QUEUE_SIZES 147 } 148 features(&self) -> u64149 fn features(&self) -> u64 { 150 self.avail_features 151 } 152 read_config(&self, offset: u64, data: &mut [u8])153 fn read_config(&self, offset: u64, data: &mut [u8]) { 154 let cid = Le64::from(self.cid); 155 copy_config(data, 0, cid.as_bytes(), offset); 156 } 157 ack_features(&mut self, value: u64)158 fn ack_features(&mut self, value: u64) { 159 let mut v = value; 160 161 // Check if the guest is ACK'ing a feature that we didn't claim to have. 162 let unrequested_features = v & !self.avail_features; 163 if unrequested_features != 0 { 164 warn!("vsock: virtio-vsock got unknown feature ack: {:x}", v); 165 166 // Don't count these features as acked. 167 v &= !unrequested_features; 168 } 169 self.acked_features |= v; 170 } 171 activate( &mut self, mem: GuestMemory, interrupt: Interrupt, mut queues: BTreeMap<usize, Queue>, ) -> anyhow::Result<()>172 fn activate( 173 &mut self, 174 mem: GuestMemory, 175 interrupt: Interrupt, 176 mut queues: BTreeMap<usize, Queue>, 177 ) -> anyhow::Result<()> { 178 if queues.len() != NUM_QUEUES { 179 return Err(anyhow!( 180 "net: expected {} queues, got {}", 181 NUM_QUEUES, 182 queues.len() 183 )); 184 } 185 186 let vhost_handle = self.vhost_handle.take().context("missing vhost_handle")?; 187 let interrupts = self.interrupts.take().context("missing interrupts")?; 188 let acked_features = self.acked_features; 189 let cid = self.cid; 190 191 // The third vq is an event-only vq that is not handled by the vhost 192 // subsystem (but still needs to exist). Split it off here. 193 let mut event_queue = queues.remove(&2).unwrap(); 194 // Send TRANSPORT_RESET event if needed. 195 if self.needs_transport_reset { 196 self.needs_transport_reset = false; 197 198 // We assume the event queue is non-empty. This should be OK for existing use cases 199 // because we expect the guest vsock driver to be initialized at the time of snapshot 200 // and this is only the event we ever write to the queue. 201 // 202 // If that assumption becomes invalid, we could integrate this logic into the worker 203 // thread's event loop so that it can wait for space in the queue. 204 let mut avail_desc = event_queue 205 .pop() 206 .expect("event queue is empty, can't send transport reset event"); 207 let transport_reset = virtio_sys::virtio_vsock::virtio_vsock_event{ 208 id: virtio_sys::virtio_vsock::virtio_vsock_event_id_VIRTIO_VSOCK_EVENT_TRANSPORT_RESET.into(), 209 }; 210 avail_desc 211 .writer 212 .write_obj(transport_reset) 213 .expect("failed to write transport reset event"); 214 let len = avail_desc.writer.bytes_written() as u32; 215 event_queue.add_used(avail_desc, len); 216 event_queue.trigger_interrupt(&interrupt); 217 } 218 self.event_queue = Some(event_queue); 219 220 let mut worker = Worker::new( 221 queues, 222 vhost_handle, 223 interrupts, 224 interrupt, 225 acked_features, 226 None, 227 ); 228 let activate_vqs = |handle: &VhostVsockHandle| -> Result<()> { 229 handle.set_cid(cid).map_err(Error::VhostVsockSetCid)?; 230 handle.start().map_err(Error::VhostVsockStart)?; 231 Ok(()) 232 }; 233 worker 234 .init(mem, QUEUE_SIZES, activate_vqs, self.vrings_base.take()) 235 .context("vsock worker init exited with error")?; 236 237 self.worker_thread = Some(WorkerThread::start("vhost_vsock", move |kill_evt| { 238 let cleanup_vqs = |_handle: &VhostVsockHandle| -> Result<()> { Ok(()) }; 239 let result = worker.run(cleanup_vqs, kill_evt); 240 if let Err(e) = result { 241 error!("vsock worker thread exited with error: {:?}", e); 242 } 243 worker 244 })); 245 246 Ok(()) 247 } 248 on_device_sandboxed(&mut self)249 fn on_device_sandboxed(&mut self) { 250 // ignore the error but to log the error. We don't need to do 251 // anything here because when activate, the other vhost set up 252 // will be failed to stop the activate thread. 253 if let Some(vhost_handle) = &self.vhost_handle { 254 match vhost_handle.set_owner() { 255 Ok(_) => {} 256 Err(e) => error!("{}: failed to set owner: {:?}", self.debug_label(), e), 257 } 258 } 259 } 260 virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>>261 fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> { 262 if let Some(worker_thread) = self.worker_thread.take() { 263 let worker = worker_thread.stop(); 264 self.interrupts = Some(worker.vhost_interrupt); 265 worker 266 .vhost_handle 267 .stop() 268 .context("failed to stop vrings")?; 269 let mut queues: BTreeMap<usize, Queue> = worker.queues; 270 let mut vrings_base = Vec::new(); 271 for (pos, _) in queues.iter() { 272 let vring_base = VringBase { 273 index: *pos, 274 base: worker.vhost_handle.get_vring_base(*pos)?, 275 }; 276 vrings_base.push(vring_base); 277 } 278 self.vrings_base = Some(vrings_base); 279 self.vhost_handle = Some(worker.vhost_handle); 280 queues.insert( 281 2, 282 self.event_queue.take().expect("Vsock event queue missing"), 283 ); 284 return Ok(Some(BTreeMap::from_iter(queues))); 285 } 286 Ok(None) 287 } 288 virtio_wake( &mut self, device_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>, ) -> anyhow::Result<()>289 fn virtio_wake( 290 &mut self, 291 device_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>, 292 ) -> anyhow::Result<()> { 293 match device_state { 294 None => Ok(()), 295 Some((mem, interrupt, queues)) => { 296 // TODO: activate is just what we want at the moment, but we should probably move 297 // it into a "start workers" function to make it obvious that it isn't strictly 298 // used for activate events. 299 self.activate(mem, interrupt, queues)?; 300 Ok(()) 301 } 302 } 303 } 304 virtio_snapshot(&mut self) -> anyhow::Result<serde_json::Value>305 fn virtio_snapshot(&mut self) -> anyhow::Result<serde_json::Value> { 306 let vrings_base = self.vrings_base.clone().unwrap_or_default(); 307 serde_json::to_value(VsockSnapshot { 308 // `cid` and `avail_features` are snapshot as a safeguard. Upon restore, validate 309 // cid and avail_features in the current vsock match the previously snapshot vsock. 310 cid: self.cid, 311 avail_features: self.avail_features, 312 acked_features: self.acked_features, 313 vrings_base, 314 }) 315 .context("failed to snapshot virtio console") 316 } 317 virtio_restore(&mut self, data: serde_json::Value) -> anyhow::Result<()>318 fn virtio_restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> { 319 let deser: VsockSnapshot = 320 serde_json::from_value(data).context("failed to deserialize virtio vsock")?; 321 anyhow::ensure!( 322 self.cid == deser.cid, 323 "Virtio vsock incorrect cid for restore:\n Expected: {}, Actual: {}", 324 self.cid, 325 deser.cid, 326 ); 327 anyhow::ensure!( 328 self.avail_features == deser.avail_features, 329 "Virtio vsock incorrect avail features for restore:\n Expected: {}, Actual: {}", 330 self.avail_features, 331 deser.avail_features, 332 ); 333 self.acked_features = deser.acked_features; 334 self.vrings_base = Some(deser.vrings_base); 335 // Send the TRANSPORT_RESET on next wake so that the guest knows that its existing vsock 336 // connections are broken. 337 self.needs_transport_reset = true; 338 Ok(()) 339 } 340 } 341 342 #[cfg(test)] 343 mod tests { 344 use std::convert::TryInto; 345 346 use super::*; 347 348 #[test] ack_features()349 fn ack_features() { 350 let cid = 5; 351 let features: u64 = (1 << 20) | (1 << 49) | (1 << 2) | (1 << 19); 352 let mut acked_features: u64 = 0; 353 let mut unavailable_features: u64 = 0; 354 355 let mut vsock = Vsock::new_for_testing(cid, features); 356 assert_eq!(acked_features, vsock.acked_features()); 357 358 acked_features |= 1 << 2; 359 vsock.ack_features(acked_features); 360 assert_eq!(acked_features, vsock.acked_features()); 361 362 acked_features |= 1 << 49; 363 vsock.ack_features(acked_features); 364 assert_eq!(acked_features, vsock.acked_features()); 365 366 acked_features |= 1 << 60; 367 unavailable_features |= 1 << 60; 368 vsock.ack_features(acked_features); 369 assert_eq!( 370 acked_features & !unavailable_features, 371 vsock.acked_features() 372 ); 373 374 acked_features |= 1 << 1; 375 unavailable_features |= 1 << 1; 376 vsock.ack_features(acked_features); 377 assert_eq!( 378 acked_features & !unavailable_features, 379 vsock.acked_features() 380 ); 381 } 382 383 #[test] read_config()384 fn read_config() { 385 let cid = 0xfca9a559fdcb9756; 386 let vsock = Vsock::new_for_testing(cid, 0); 387 388 let mut buf = [0u8; 8]; 389 vsock.read_config(0, &mut buf); 390 assert_eq!(cid, u64::from_le_bytes(buf)); 391 392 vsock.read_config(0, &mut buf[..4]); 393 assert_eq!( 394 (cid & 0xffffffff) as u32, 395 u32::from_le_bytes(buf[..4].try_into().unwrap()) 396 ); 397 398 vsock.read_config(4, &mut buf[..4]); 399 assert_eq!( 400 (cid >> 32) as u32, 401 u32::from_le_bytes(buf[..4].try_into().unwrap()) 402 ); 403 404 let data: [u8; 8] = [8, 226, 5, 46, 159, 59, 89, 77]; 405 buf.copy_from_slice(&data); 406 407 vsock.read_config(12, &mut buf); 408 assert_eq!(&buf, &data); 409 } 410 411 #[test] features()412 fn features() { 413 let cid = 5; 414 let features: u64 = 0xfc195ae8db88cff9; 415 416 let vsock = Vsock::new_for_testing(cid, features); 417 assert_eq!(features, vsock.features()); 418 } 419 } 420