1 // Copyright 2021 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::os::unix::fs::OpenOptionsExt;
6 use std::{
7 convert::{self, TryFrom, TryInto},
8 fs::{File, OpenOptions},
9 mem::size_of,
10 num::Wrapping,
11 os::unix::net::UnixListener,
12 path::Path,
13 str,
14 sync::{Arc, Mutex as StdMutex},
15 };
16
17 use anyhow::{bail, Context};
18 use argh::FromArgs;
19 use base::{
20 clear_fd_flags, error, info, AsRawDescriptor, Event, FromRawDescriptor, IntoRawDescriptor,
21 SafeDescriptor, UnlinkUnixListener,
22 };
23 use cros_async::{AsyncWrapper, EventAsync, Executor};
24 use data_model::{DataInit, Le64};
25 use hypervisor::ProtectionType;
26 use sync::Mutex;
27 use vhost::{self, Vhost, Vsock};
28 use vm_memory::GuestMemory;
29 use vmm_vhost::{
30 connection::vfio::{Endpoint as VfioEndpoint, Listener as VfioListener},
31 message::{
32 VhostUserConfigFlags, VhostUserInflight, VhostUserMemoryRegion, VhostUserProtocolFeatures,
33 VhostUserSingleMemoryRegion, VhostUserVirtioFeatures, VhostUserVringAddrFlags,
34 VhostUserVringState,
35 },
36 Error, Result, SlaveReqHandler, VhostUserSlaveReqHandlerMut,
37 };
38 use vmm_vhost::{Protocol, SlaveListener};
39
40 use crate::{
41 vfio::VfioRegionAddr,
42 virtio::{
43 base_features,
44 vhost::{
45 user::device::{
46 handler::{
47 create_guest_memory, create_vvu_guest_memory, vmm_va_to_gpa, HandlerType,
48 MappingInfo,
49 },
50 vvu::{doorbell::DoorbellRegion, pci::VvuPciDevice, VvuDevice},
51 },
52 vsock,
53 },
54 Queue, SignalableInterrupt,
55 },
56 };
57
58 const MAX_VRING_LEN: u16 = vsock::QUEUE_SIZE;
59 const NUM_QUEUES: usize = vsock::QUEUE_SIZES.len();
60 const EVENT_QUEUE: usize = NUM_QUEUES - 1;
61
62 struct VsockBackend {
63 ex: Executor,
64 handle: Vsock,
65 cid: u64,
66 features: u64,
67 handler_type: HandlerType,
68 protocol_features: VhostUserProtocolFeatures,
69 mem: Option<GuestMemory>,
70 vmm_maps: Option<Vec<MappingInfo>>,
71 queues: [Queue; NUM_QUEUES],
72 // Only used for vvu device mode.
73 call_evts: [Option<Arc<Mutex<DoorbellRegion>>>; NUM_QUEUES],
74 }
75
76 impl VsockBackend {
new<P: AsRef<Path>>( ex: &Executor, cid: u64, vhost_socket: P, handler_type: HandlerType, ) -> anyhow::Result<VsockBackend>77 fn new<P: AsRef<Path>>(
78 ex: &Executor,
79 cid: u64,
80 vhost_socket: P,
81 handler_type: HandlerType,
82 ) -> anyhow::Result<VsockBackend> {
83 let handle = Vsock::new(
84 OpenOptions::new()
85 .read(true)
86 .write(true)
87 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
88 .open(vhost_socket)
89 .context("failed to open `Vsock` socket")?,
90 );
91
92 let features = handle.get_features().context("failed to get features")?;
93 let protocol_features = VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::CONFIG;
94 Ok(VsockBackend {
95 ex: ex.clone(),
96 handle,
97 cid,
98 features,
99 handler_type,
100 protocol_features,
101 mem: None,
102 vmm_maps: None,
103 queues: [
104 Queue::new(MAX_VRING_LEN),
105 Queue::new(MAX_VRING_LEN),
106 Queue::new(MAX_VRING_LEN),
107 ],
108 call_evts: Default::default(),
109 })
110 }
111 }
112
convert_vhost_error(err: vhost::Error) -> Error113 fn convert_vhost_error(err: vhost::Error) -> Error {
114 use vhost::Error::*;
115 match err {
116 IoctlError(e) => Error::ReqHandlerError(e),
117 _ => Error::SlaveInternalError,
118 }
119 }
120
121 impl VhostUserSlaveReqHandlerMut for VsockBackend {
protocol(&self) -> Protocol122 fn protocol(&self) -> Protocol {
123 match self.handler_type {
124 HandlerType::VhostUser => Protocol::Regular,
125 HandlerType::Vvu { .. } => Protocol::Virtio,
126 }
127 }
128
set_owner(&mut self) -> Result<()>129 fn set_owner(&mut self) -> Result<()> {
130 self.handle.set_owner().map_err(convert_vhost_error)
131 }
132
reset_owner(&mut self) -> Result<()>133 fn reset_owner(&mut self) -> Result<()> {
134 self.handle.reset_owner().map_err(convert_vhost_error)
135 }
136
get_features(&mut self) -> Result<u64>137 fn get_features(&mut self) -> Result<u64> {
138 let features = base_features(ProtectionType::Unprotected)
139 | self.features
140 | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
141 Ok(features)
142 }
143
set_features(&mut self, features: u64) -> Result<()>144 fn set_features(&mut self, features: u64) -> Result<()> {
145 self.handle
146 .set_features(features & self.features)
147 .map_err(convert_vhost_error)
148 }
149
get_protocol_features(&mut self) -> Result<VhostUserProtocolFeatures>150 fn get_protocol_features(&mut self) -> Result<VhostUserProtocolFeatures> {
151 Ok(self.protocol_features)
152 }
153
set_protocol_features(&mut self, features: u64) -> Result<()>154 fn set_protocol_features(&mut self, features: u64) -> Result<()> {
155 let unrequested_features = features & !self.protocol_features.bits();
156 if unrequested_features != 0 {
157 Err(Error::InvalidParam)
158 } else {
159 Ok(())
160 }
161 }
162
set_mem_table( &mut self, contexts: &[VhostUserMemoryRegion], files: Vec<File>, ) -> Result<()>163 fn set_mem_table(
164 &mut self,
165 contexts: &[VhostUserMemoryRegion],
166 files: Vec<File>,
167 ) -> Result<()> {
168 let (guest_mem, vmm_maps) = match &self.handler_type {
169 HandlerType::VhostUser => create_guest_memory(contexts, files)?,
170 HandlerType::Vvu { vfio_dev, caps, .. } => {
171 // virtio-vhost-user doesn't pass FDs.
172 if !files.is_empty() {
173 return Err(Error::InvalidParam);
174 }
175 create_vvu_guest_memory(vfio_dev.as_ref(), caps.shared_mem_cfg_addr(), contexts)?
176 }
177 };
178
179 self.handle
180 .set_mem_table(&guest_mem)
181 .map_err(convert_vhost_error)?;
182
183 self.mem = Some(guest_mem);
184 self.vmm_maps = Some(vmm_maps);
185
186 Ok(())
187 }
188
get_queue_num(&mut self) -> Result<u64>189 fn get_queue_num(&mut self) -> Result<u64> {
190 Ok(NUM_QUEUES as u64)
191 }
192
set_vring_num(&mut self, index: u32, num: u32) -> Result<()>193 fn set_vring_num(&mut self, index: u32, num: u32) -> Result<()> {
194 if index >= NUM_QUEUES as u32 || num == 0 || num > vsock::QUEUE_SIZE.into() {
195 return Err(Error::InvalidParam);
196 }
197
198 // We checked these values already.
199 let index = index as usize;
200 let num = num as u16;
201 self.queues[index].size = num;
202
203 // The last vq is an event-only vq that is not handled by the kernel.
204 if index == EVENT_QUEUE {
205 return Ok(());
206 }
207
208 self.handle
209 .set_vring_num(index, num)
210 .map_err(convert_vhost_error)
211 }
212
set_vring_addr( &mut self, index: u32, flags: VhostUserVringAddrFlags, descriptor: u64, used: u64, available: u64, log: u64, ) -> Result<()>213 fn set_vring_addr(
214 &mut self,
215 index: u32,
216 flags: VhostUserVringAddrFlags,
217 descriptor: u64,
218 used: u64,
219 available: u64,
220 log: u64,
221 ) -> Result<()> {
222 if index >= NUM_QUEUES as u32 {
223 return Err(Error::InvalidParam);
224 }
225
226 let index = index as usize;
227
228 let mem = self.mem.as_ref().ok_or(Error::InvalidParam)?;
229 let maps = self.vmm_maps.as_ref().ok_or(Error::InvalidParam)?;
230
231 let mut queue = &mut self.queues[index];
232 queue.desc_table = vmm_va_to_gpa(maps, descriptor)?;
233 queue.avail_ring = vmm_va_to_gpa(maps, available)?;
234 queue.used_ring = vmm_va_to_gpa(maps, used)?;
235 let log_addr = if flags.contains(VhostUserVringAddrFlags::VHOST_VRING_F_LOG) {
236 vmm_va_to_gpa(maps, log).map(Some)?
237 } else {
238 None
239 };
240
241 if index == EVENT_QUEUE {
242 return Ok(());
243 }
244
245 self.handle
246 .set_vring_addr(
247 mem,
248 queue.max_size,
249 queue.actual_size(),
250 index,
251 flags.bits(),
252 queue.desc_table,
253 queue.used_ring,
254 queue.avail_ring,
255 log_addr,
256 )
257 .map_err(convert_vhost_error)
258 }
259
set_vring_base(&mut self, index: u32, base: u32) -> Result<()>260 fn set_vring_base(&mut self, index: u32, base: u32) -> Result<()> {
261 if index >= NUM_QUEUES as u32 || base >= vsock::QUEUE_SIZE.into() {
262 return Err(Error::InvalidParam);
263 }
264
265 let index = index as usize;
266 let base = base as u16;
267
268 let mut queue = &mut self.queues[index];
269 queue.next_avail = Wrapping(base);
270 queue.next_used = Wrapping(base);
271
272 if index == EVENT_QUEUE {
273 return Ok(());
274 }
275
276 self.handle
277 .set_vring_base(index, base)
278 .map_err(convert_vhost_error)
279 }
280
get_vring_base(&mut self, index: u32) -> Result<VhostUserVringState>281 fn get_vring_base(&mut self, index: u32) -> Result<VhostUserVringState> {
282 if index >= NUM_QUEUES as u32 {
283 return Err(Error::InvalidParam);
284 }
285
286 let index = index as usize;
287 let next_avail = if index == EVENT_QUEUE {
288 self.queues[index].next_avail.0
289 } else {
290 self.handle
291 .get_vring_base(index)
292 .map_err(convert_vhost_error)?
293 };
294
295 Ok(VhostUserVringState::new(index as u32, next_avail.into()))
296 }
297
set_vring_kick(&mut self, index: u8, fd: Option<File>) -> Result<()>298 fn set_vring_kick(&mut self, index: u8, fd: Option<File>) -> Result<()> {
299 if index >= NUM_QUEUES as u8 {
300 return Err(Error::InvalidParam);
301 }
302
303 let index = usize::from(index);
304 let event = match &self.handler_type {
305 HandlerType::Vvu {
306 notification_evts, ..
307 } => {
308 if fd.is_some() {
309 return Err(Error::InvalidParam);
310 }
311 let queue = &mut self.queues[index];
312 if queue.ready {
313 error!("kick fd cannot replaced after queue is started");
314 return Err(Error::InvalidOperation);
315 }
316
317 notification_evts[index].try_clone().map_err(|e| {
318 error!("failed to clone notification_evts[{}]: {}", index, e);
319 Error::InvalidOperation
320 })?
321 }
322 HandlerType::VhostUser => {
323 let file = fd.ok_or(Error::InvalidParam)?;
324
325 // Safe because the descriptor is uniquely owned by `file`.
326 let event = unsafe { Event::from_raw_descriptor(file.into_raw_descriptor()) };
327
328 // Remove O_NONBLOCK from the kick fd.
329 if let Err(e) = clear_fd_flags(event.as_raw_descriptor(), libc::O_NONBLOCK) {
330 error!("failed to remove O_NONBLOCK for kick fd: {}", e);
331 return Err(Error::InvalidParam);
332 }
333
334 event
335 }
336 };
337
338 if index != EVENT_QUEUE {
339 self.handle
340 .set_vring_kick(index, &event)
341 .map_err(convert_vhost_error)?;
342 }
343
344 Ok(())
345 }
346
set_vring_call(&mut self, index: u8, fd: Option<File>) -> Result<()>347 fn set_vring_call(&mut self, index: u8, fd: Option<File>) -> Result<()> {
348 if index >= NUM_QUEUES as u8 {
349 return Err(Error::InvalidParam);
350 }
351
352 let index = usize::from(index);
353 let event = match &self.handler_type {
354 HandlerType::Vvu { vfio_dev, caps, .. } => {
355 let vfio = Arc::clone(vfio_dev);
356 let base = caps.doorbell_base_addr();
357 let addr = VfioRegionAddr {
358 index: base.index,
359 addr: base.addr + (index as u64 * caps.doorbell_off_multiplier() as u64),
360 };
361
362 let doorbell = DoorbellRegion {
363 vfio,
364 index: index as u8,
365 addr,
366 };
367 let call_evt = match self.call_evts[index].as_ref() {
368 None => {
369 let evt = Arc::new(Mutex::new(doorbell));
370 self.call_evts[index] = Some(evt.clone());
371 evt
372 }
373 Some(evt) => {
374 *evt.lock() = doorbell;
375 evt.clone()
376 }
377 };
378
379 let kernel_evt = Event::new().map_err(|_| Error::SlaveInternalError)?;
380 let task_evt = EventAsync::new(
381 kernel_evt.try_clone().expect("failed to clone event").0,
382 &self.ex,
383 )
384 .map_err(|_| Error::SlaveInternalError)?;
385 self.ex
386 .spawn_local(async move {
387 loop {
388 let _ = task_evt
389 .next_val()
390 .await
391 .expect("failed to wait for event fd");
392 call_evt.signal_used_queue(index as u16);
393 }
394 })
395 .detach();
396 kernel_evt
397 }
398 HandlerType::VhostUser => {
399 let file = fd.ok_or(Error::InvalidParam)?;
400 // Safe because the descriptor is uniquely owned by `file`.
401 unsafe { Event::from_raw_descriptor(file.into_raw_descriptor()) }
402 }
403 };
404 if index != EVENT_QUEUE {
405 self.handle
406 .set_vring_call(index, &event)
407 .map_err(convert_vhost_error)?;
408 }
409
410 Ok(())
411 }
412
set_vring_err(&mut self, index: u8, fd: Option<File>) -> Result<()>413 fn set_vring_err(&mut self, index: u8, fd: Option<File>) -> Result<()> {
414 if index >= NUM_QUEUES as u8 {
415 return Err(Error::InvalidParam);
416 }
417
418 let index = usize::from(index);
419 let file = fd.ok_or(Error::InvalidParam)?;
420
421 // Safe because the descriptor is uniquely owned by `file`.
422 let event = unsafe { Event::from_raw_descriptor(file.into_raw_descriptor()) };
423
424 if index == EVENT_QUEUE {
425 return Ok(());
426 }
427
428 self.handle
429 .set_vring_err(index, &event)
430 .map_err(convert_vhost_error)
431 }
432
set_vring_enable(&mut self, index: u32, enable: bool) -> Result<()>433 fn set_vring_enable(&mut self, index: u32, enable: bool) -> Result<()> {
434 if index >= NUM_QUEUES as u32 {
435 return Err(Error::InvalidParam);
436 }
437
438 self.queues[index as usize].ready = enable;
439
440 if index == (EVENT_QUEUE) as u32 {
441 return Ok(());
442 }
443
444 if self.queues[..EVENT_QUEUE].iter().all(|q| q.ready) {
445 // All queues are ready. Start the device.
446 self.handle.set_cid(self.cid).map_err(convert_vhost_error)?;
447 self.handle.start().map_err(convert_vhost_error)
448 } else if !enable {
449 // If we just disabled a vring then stop the device.
450 self.handle.stop().map_err(convert_vhost_error)
451 } else {
452 Ok(())
453 }
454 }
455
get_config( &mut self, offset: u32, size: u32, _flags: VhostUserConfigFlags, ) -> Result<Vec<u8>>456 fn get_config(
457 &mut self,
458 offset: u32,
459 size: u32,
460 _flags: VhostUserConfigFlags,
461 ) -> Result<Vec<u8>> {
462 let start: usize = offset.try_into().map_err(|_| Error::InvalidParam)?;
463 let end: usize = offset
464 .checked_add(size)
465 .and_then(|e| e.try_into().ok())
466 .ok_or(Error::InvalidParam)?;
467
468 if start >= size_of::<Le64>() || end > size_of::<Le64>() {
469 return Err(Error::InvalidParam);
470 }
471
472 Ok(Le64::from(self.cid).as_slice()[start..end].to_vec())
473 }
474
set_config( &mut self, _offset: u32, _buf: &[u8], _flags: VhostUserConfigFlags, ) -> Result<()>475 fn set_config(
476 &mut self,
477 _offset: u32,
478 _buf: &[u8],
479 _flags: VhostUserConfigFlags,
480 ) -> Result<()> {
481 Err(Error::InvalidOperation)
482 }
483
set_slave_req_fd(&mut self, _vu_req: File)484 fn set_slave_req_fd(&mut self, _vu_req: File) {}
485
get_inflight_fd( &mut self, _inflight: &VhostUserInflight, ) -> Result<(VhostUserInflight, File)>486 fn get_inflight_fd(
487 &mut self,
488 _inflight: &VhostUserInflight,
489 ) -> Result<(VhostUserInflight, File)> {
490 Err(Error::InvalidOperation)
491 }
492
set_inflight_fd(&mut self, _inflight: &VhostUserInflight, _file: File) -> Result<()>493 fn set_inflight_fd(&mut self, _inflight: &VhostUserInflight, _file: File) -> Result<()> {
494 Err(Error::InvalidOperation)
495 }
496
get_max_mem_slots(&mut self) -> Result<u64>497 fn get_max_mem_slots(&mut self) -> Result<u64> {
498 Err(Error::InvalidOperation)
499 }
500
add_mem_region(&mut self, _region: &VhostUserSingleMemoryRegion, _fd: File) -> Result<()>501 fn add_mem_region(&mut self, _region: &VhostUserSingleMemoryRegion, _fd: File) -> Result<()> {
502 Err(Error::InvalidOperation)
503 }
504
remove_mem_region(&mut self, _region: &VhostUserSingleMemoryRegion) -> Result<()>505 fn remove_mem_region(&mut self, _region: &VhostUserSingleMemoryRegion) -> Result<()> {
506 Err(Error::InvalidOperation)
507 }
508 }
509
run_device<P: AsRef<Path>>( ex: &Executor, socket: P, backend: Arc<StdMutex<VsockBackend>>, ) -> anyhow::Result<()>510 async fn run_device<P: AsRef<Path>>(
511 ex: &Executor,
512 socket: P,
513 backend: Arc<StdMutex<VsockBackend>>,
514 ) -> anyhow::Result<()> {
515 let listener = UnixListener::bind(socket)
516 .map(UnlinkUnixListener)
517 .context("failed to bind socket")?;
518 let (socket, _) = ex
519 .spawn_blocking(move || listener.accept())
520 .await
521 .context("failed to accept socket connection")?;
522
523 let mut req_handler = SlaveReqHandler::from_stream(socket, backend);
524 let h = SafeDescriptor::try_from(&req_handler as &dyn AsRawDescriptor)
525 .map(AsyncWrapper::new)
526 .expect("failed to get safe descriptor for handler");
527 let handler_source = ex.async_from(h).context("failed to create async handler")?;
528
529 loop {
530 handler_source
531 .wait_readable()
532 .await
533 .context("failed to wait for vhost socket to become readable")?;
534 match req_handler.handle_request() {
535 Ok(()) => (),
536 Err(Error::Disconnect) => {
537 info!("vhost-user connection closed");
538 // Exit as the client closed the connection.
539 return Ok(());
540 }
541 Err(e) => {
542 bail!("failed to handle a vhost-user request: {}", e);
543 }
544 };
545 }
546 }
547
548 #[derive(FromArgs)]
549 #[argh(description = "")]
550 struct Options {
551 #[argh(
552 option,
553 description = "path to bind a listening vhost-user socket",
554 arg_name = "PATH"
555 )]
556 socket: Option<String>,
557 #[argh(option, description = "name of vfio pci device", arg_name = "STRING")]
558 vfio: Option<String>,
559 #[argh(
560 option,
561 description = "the vsock context id for this device",
562 arg_name = "INT"
563 )]
564 cid: u64,
565 #[argh(
566 option,
567 description = "path to the vhost-vsock control socket",
568 default = "String::from(\"/dev/vhost-vsock\")",
569 arg_name = "PATH"
570 )]
571 vhost_socket: String,
572 }
573
run_vvu_device<P: AsRef<Path>>( ex: &Executor, cid: u64, vhost_socket: P, device_name: &str, ) -> anyhow::Result<()>574 fn run_vvu_device<P: AsRef<Path>>(
575 ex: &Executor,
576 cid: u64,
577 vhost_socket: P,
578 device_name: &str,
579 ) -> anyhow::Result<()> {
580 let mut device =
581 VvuPciDevice::new(device_name, NUM_QUEUES).context("failed to create `VvuPciDevice`")?;
582 let backend = VsockBackend::new(
583 ex,
584 cid,
585 vhost_socket,
586 HandlerType::Vvu {
587 vfio_dev: Arc::clone(&device.vfio_dev),
588 caps: device.caps.clone(),
589 notification_evts: std::mem::take(&mut device.notification_evts),
590 },
591 )
592 .map(StdMutex::new)
593 .map(Arc::new)
594 .context("failed to create `VsockBackend`")?;
595 let driver = VvuDevice::new(device);
596
597 let mut listener = VfioListener::new(driver)
598 .context("failed to create `VfioListener`")
599 .and_then(|l| {
600 SlaveListener::<VfioEndpoint<_, _>, _>::new(l, backend)
601 .context("failed to create `SlaveListener`")
602 })?;
603 let mut req_handler = listener
604 .accept()
605 .context("failed to accept vfio connection")?
606 .expect("no incoming connection detected");
607 let h = SafeDescriptor::try_from(&req_handler as &dyn AsRawDescriptor)
608 .map(AsyncWrapper::new)
609 .expect("failed to get safe descriptor for handler");
610 let handler_source = ex
611 .async_from(h)
612 .context("failed to create async handler source")?;
613
614 let done = async move {
615 loop {
616 let count = handler_source
617 .read_u64()
618 .await
619 .context("failed to wait for handler source")?;
620 for _ in 0..count {
621 req_handler
622 .handle_request()
623 .context("failed to handle request")?;
624 }
625 }
626 };
627 match ex.run_until(done) {
628 Ok(Ok(())) => Ok(()),
629 Ok(Err(e)) => Err(e),
630 Err(e) => Err(e).context("executor error"),
631 }
632 }
633
634 /// Returns an error if the given `args` is invalid or the device fails to run.
run_vsock_device(program_name: &str, args: &[&str]) -> anyhow::Result<()>635 pub fn run_vsock_device(program_name: &str, args: &[&str]) -> anyhow::Result<()> {
636 let opts = match Options::from_args(&[program_name], args) {
637 Ok(opts) => opts,
638 Err(e) => {
639 if e.status.is_err() {
640 bail!(e.output);
641 } else {
642 println!("{}", e.output);
643 }
644 return Ok(());
645 }
646 };
647
648 let ex = Executor::new().context("failed to create executor")?;
649
650 match (opts.socket, opts.vfio) {
651 (Some(socket), None) => {
652 let backend =
653 VsockBackend::new(&ex, opts.cid, opts.vhost_socket, HandlerType::VhostUser)
654 .map(StdMutex::new)
655 .map(Arc::new)?;
656
657 // TODO: Replace the `and_then` with `Result::flatten` once it is stabilized.
658 ex.run_until(run_device(&ex, socket, backend))
659 .context("failed to run vsock device")
660 .and_then(convert::identity)
661 }
662 (None, Some(device_name)) => run_vvu_device(&ex, opts.cid, opts.vhost_socket, &device_name),
663 _ => bail!("Exactly one of `--socket` or `--vfio` is required"),
664 }
665 }
666