1 // Copyright 2021 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::{cell::RefCell, collections::BTreeMap, fs::File, path::PathBuf, rc::Rc, sync::Arc};
6
7 use anyhow::{anyhow, bail, Context};
8 use argh::FromArgs;
9 use async_task::Task;
10 use base::{
11 clone_descriptor, error, warn, Event, FromRawDescriptor, IntoRawDescriptor, SafeDescriptor,
12 Tube, UnixSeqpacketListener, UnlinkUnixSeqpacketListener,
13 };
14 use cros_async::{AsyncTube, AsyncWrapper, EventAsync, Executor, IoSourceExt};
15 use hypervisor::ProtectionType;
16 use sync::Mutex;
17 use vm_memory::GuestMemory;
18 use vmm_vhost::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
19
20 use crate::virtio::{
21 self, gpu,
22 vhost::user::device::handler::{DeviceRequestHandler, Doorbell, VhostUserBackend},
23 vhost::user::device::wl::parse_wayland_sock,
24 DescriptorChain, Gpu, GpuDisplayParameters, GpuParameters, Queue, QueueReader, VirtioDevice,
25 };
26
27 #[derive(Clone)]
28 struct SharedReader {
29 queue: Arc<Mutex<Queue>>,
30 doorbell: Arc<Mutex<Doorbell>>,
31 }
32
33 impl gpu::QueueReader for SharedReader {
pop(&self, mem: &GuestMemory) -> Option<DescriptorChain>34 fn pop(&self, mem: &GuestMemory) -> Option<DescriptorChain> {
35 self.queue.lock().pop(mem)
36 }
37
add_used(&self, mem: &GuestMemory, desc_index: u16, len: u32)38 fn add_used(&self, mem: &GuestMemory, desc_index: u16, len: u32) {
39 self.queue.lock().add_used(mem, desc_index, len)
40 }
41
signal_used(&self, mem: &GuestMemory)42 fn signal_used(&self, mem: &GuestMemory) {
43 self.queue.lock().trigger_interrupt(mem, &self.doorbell);
44 }
45 }
46
run_ctrl_queue( reader: SharedReader, mem: GuestMemory, kick_evt: EventAsync, state: Rc<RefCell<gpu::Frontend>>, )47 async fn run_ctrl_queue(
48 reader: SharedReader,
49 mem: GuestMemory,
50 kick_evt: EventAsync,
51 state: Rc<RefCell<gpu::Frontend>>,
52 ) {
53 loop {
54 if let Err(e) = kick_evt.next_val().await {
55 error!("Failed to read kick event for ctrl queue: {}", e);
56 }
57
58 let mut state = state.borrow_mut();
59 let needs_interrupt = state.process_queue(&mem, &reader);
60
61 if needs_interrupt {
62 reader.signal_used(&mem);
63 }
64 }
65 }
66
run_display( display: Box<dyn IoSourceExt<AsyncWrapper<SafeDescriptor>>>, state: Rc<RefCell<gpu::Frontend>>, )67 async fn run_display(
68 display: Box<dyn IoSourceExt<AsyncWrapper<SafeDescriptor>>>,
69 state: Rc<RefCell<gpu::Frontend>>,
70 ) {
71 loop {
72 if let Err(e) = display.wait_readable().await {
73 error!(
74 "Failed to wait for display context to become readable: {}",
75 e
76 );
77 break;
78 }
79
80 if state.borrow_mut().process_display() {
81 break;
82 }
83 }
84 }
85
run_resource_bridge(tube: Box<dyn IoSourceExt<Tube>>, state: Rc<RefCell<gpu::Frontend>>)86 async fn run_resource_bridge(tube: Box<dyn IoSourceExt<Tube>>, state: Rc<RefCell<gpu::Frontend>>) {
87 loop {
88 if let Err(e) = tube.wait_readable().await {
89 error!(
90 "Failed to wait for resource bridge tube to become readable: {}",
91 e
92 );
93 break;
94 }
95
96 if let Err(e) = state.borrow_mut().process_resource_bridge(tube.as_source()) {
97 error!("Failed to process resource bridge: {:#}", e);
98 break;
99 }
100 }
101 }
102
cancel_task<R: 'static>(ex: &Executor, task: Task<R>)103 fn cancel_task<R: 'static>(ex: &Executor, task: Task<R>) {
104 ex.spawn_local(task.cancel()).detach()
105 }
106
107 struct GpuBackend {
108 ex: Executor,
109 gpu: Rc<RefCell<Gpu>>,
110 resource_bridges: Arc<Mutex<Vec<Tube>>>,
111 acked_protocol_features: u64,
112 state: Option<Rc<RefCell<gpu::Frontend>>>,
113 fence_state: Arc<Mutex<gpu::FenceState>>,
114 display_worker: Option<Task<()>>,
115 workers: [Option<Task<()>>; Self::MAX_QUEUE_NUM],
116 }
117
118 impl VhostUserBackend for GpuBackend {
119 const MAX_QUEUE_NUM: usize = gpu::QUEUE_SIZES.len();
120 const MAX_VRING_LEN: u16 = gpu::QUEUE_SIZES[0];
121
122 type Error = anyhow::Error;
123
features(&self) -> u64124 fn features(&self) -> u64 {
125 self.gpu.borrow().features() | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
126 }
127
ack_features(&mut self, value: u64) -> anyhow::Result<()>128 fn ack_features(&mut self, value: u64) -> anyhow::Result<()> {
129 self.gpu.borrow_mut().ack_features(value);
130 Ok(())
131 }
132
acked_features(&self) -> u64133 fn acked_features(&self) -> u64 {
134 self.features()
135 }
136
protocol_features(&self) -> VhostUserProtocolFeatures137 fn protocol_features(&self) -> VhostUserProtocolFeatures {
138 VhostUserProtocolFeatures::CONFIG
139 | VhostUserProtocolFeatures::SLAVE_REQ
140 | VhostUserProtocolFeatures::MQ
141 }
142
ack_protocol_features(&mut self, features: u64) -> anyhow::Result<()>143 fn ack_protocol_features(&mut self, features: u64) -> anyhow::Result<()> {
144 let unrequested_features = features & !self.protocol_features().bits();
145 if unrequested_features != 0 {
146 bail!("Unexpected protocol features: {:#x}", unrequested_features);
147 }
148
149 self.acked_protocol_features |= features;
150 Ok(())
151 }
152
acked_protocol_features(&self) -> u64153 fn acked_protocol_features(&self) -> u64 {
154 self.acked_protocol_features
155 }
156
read_config(&self, offset: u64, dst: &mut [u8])157 fn read_config(&self, offset: u64, dst: &mut [u8]) {
158 self.gpu.borrow().read_config(offset, dst)
159 }
160
write_config(&self, offset: u64, data: &[u8])161 fn write_config(&self, offset: u64, data: &[u8]) {
162 self.gpu.borrow_mut().write_config(offset, data)
163 }
164
set_device_request_channel(&mut self, channel: File) -> anyhow::Result<()>165 fn set_device_request_channel(&mut self, channel: File) -> anyhow::Result<()> {
166 let tube = AsyncTube::new(&self.ex, unsafe {
167 Tube::from_raw_descriptor(channel.into_raw_descriptor())
168 })
169 .context("failed to create AsyncTube")?;
170
171 // We need a PciAddress in order to initialize the pci bar but this isn't part of the
172 // vhost-user protocol. Instead we expect this to be the first message the crosvm main
173 // process sends us on the device tube.
174 let gpu = Rc::clone(&self.gpu);
175 self.ex
176 .spawn_local(async move {
177 let response = match tube.next().await {
178 Ok(addr) => gpu.borrow_mut().get_device_bars(addr),
179 Err(e) => {
180 error!("Failed to get `PciAddr` from tube: {}", e);
181 return;
182 }
183 };
184
185 if let Err(e) = tube.send(response).await {
186 error!("Failed to send `PciBarConfiguration`: {}", e);
187 }
188
189 let device_tube: Tube = match tube.next().await {
190 Ok(tube) => tube,
191 Err(e) => {
192 error!("Failed to get device tube: {}", e);
193 return;
194 }
195 };
196
197 gpu.borrow_mut().set_device_tube(device_tube);
198 })
199 .detach();
200
201 Ok(())
202 }
203
start_queue( &mut self, idx: usize, queue: Queue, mem: GuestMemory, doorbell: Arc<Mutex<Doorbell>>, kick_evt: Event, ) -> anyhow::Result<()>204 fn start_queue(
205 &mut self,
206 idx: usize,
207 queue: Queue,
208 mem: GuestMemory,
209 doorbell: Arc<Mutex<Doorbell>>,
210 kick_evt: Event,
211 ) -> anyhow::Result<()> {
212 if let Some(task) = self.workers.get_mut(idx).and_then(Option::take) {
213 warn!("Starting new queue handler without stopping old handler");
214 cancel_task(&self.ex, task);
215 }
216
217 match idx {
218 // ctrl queue.
219 0 => {}
220 // We don't currently handle the cursor queue.
221 1 => return Ok(()),
222 _ => bail!("attempted to start unknown queue: {}", idx),
223 }
224
225 let kick_evt = EventAsync::new(kick_evt.0, &self.ex)
226 .context("failed to create EventAsync for kick_evt")?;
227
228 let reader = SharedReader {
229 queue: Arc::new(Mutex::new(queue)),
230 doorbell,
231 };
232
233 let state = if let Some(s) = self.state.as_ref() {
234 s.clone()
235 } else {
236 let fence_handler =
237 gpu::create_fence_handler(mem.clone(), reader.clone(), self.fence_state.clone());
238 let state = Rc::new(RefCell::new(
239 self.gpu
240 .borrow_mut()
241 .initialize_frontend(self.fence_state.clone(), fence_handler)
242 .ok_or_else(|| anyhow!("failed to initialize gpu frontend"))?,
243 ));
244 self.state = Some(state.clone());
245 state
246 };
247
248 // Start handling the resource bridges if we haven't already.
249 for bridge in self.resource_bridges.lock().drain(..) {
250 let tube = self
251 .ex
252 .async_from(bridge)
253 .context("failed to create async tube")?;
254 self.ex
255 .spawn_local(run_resource_bridge(tube, state.clone()))
256 .detach();
257 }
258
259 // Start handling the display, if we haven't already.
260 if self.display_worker.is_none() {
261 let display = clone_descriptor(&*state.borrow_mut().display().borrow())
262 .map(|fd| {
263 // Safe because we just created this fd.
264 AsyncWrapper::new(unsafe { SafeDescriptor::from_raw_descriptor(fd) })
265 })
266 .context("failed to clone inner WaitContext for gpu display")
267 .and_then(|ctx| {
268 self.ex
269 .async_from(ctx)
270 .context("failed to create async WaitContext")
271 })?;
272
273 let task = self.ex.spawn_local(run_display(display, state.clone()));
274 self.display_worker = Some(task);
275 }
276
277 let task = self
278 .ex
279 .spawn_local(run_ctrl_queue(reader, mem, kick_evt, state));
280 self.workers[idx] = Some(task);
281 Ok(())
282 }
283
stop_queue(&mut self, idx: usize)284 fn stop_queue(&mut self, idx: usize) {
285 if let Some(task) = self.workers.get_mut(idx).and_then(Option::take) {
286 cancel_task(&self.ex, task)
287 }
288 }
289
reset(&mut self)290 fn reset(&mut self) {
291 if let Some(task) = self.display_worker.take() {
292 cancel_task(&self.ex, task)
293 }
294
295 for task in self.workers.iter_mut().filter_map(Option::take) {
296 cancel_task(&self.ex, task)
297 }
298 }
299 }
300
gpu_parameters_from_str(input: &str) -> Result<GpuParameters, String>301 fn gpu_parameters_from_str(input: &str) -> Result<GpuParameters, String> {
302 serde_json::from_str(input).map_err(|e| e.to_string())
303 }
304
305 #[derive(FromArgs)]
306 #[argh(description = "run gpu device")]
307 struct Options {
308 #[argh(
309 option,
310 description = "path to bind a listening vhost-user socket",
311 arg_name = "PATH"
312 )]
313 socket: String,
314 #[argh(
315 option,
316 description = "path to one or more Wayland sockets. The unnamed socket is \
317 used for displaying virtual screens while the named ones are used for IPC",
318 from_str_fn(parse_wayland_sock),
319 arg_name = "PATH[,name=NAME]"
320 )]
321 wayland_sock: Vec<(String, PathBuf)>,
322 #[argh(
323 option,
324 description = "path to one or more bridge sockets for communicating with \
325 other graphics devices (wayland, video, etc)",
326 arg_name = "PATH"
327 )]
328 resource_bridge: Vec<String>,
329 #[argh(option, description = " X11 display name to use", arg_name = "DISPLAY")]
330 x_display: Option<String>,
331 #[argh(
332 option,
333 from_str_fn(gpu_parameters_from_str),
334 default = "Default::default()",
335 description = "a JSON object of virtio-gpu parameters",
336 arg_name = "JSON"
337 )]
338 params: GpuParameters,
339 }
340
run_gpu_device(program_name: &str, args: &[&str]) -> anyhow::Result<()>341 pub fn run_gpu_device(program_name: &str, args: &[&str]) -> anyhow::Result<()> {
342 let Options {
343 x_display,
344 params: mut gpu_parameters,
345 resource_bridge,
346 socket,
347 wayland_sock,
348 } = match Options::from_args(&[program_name], args) {
349 Ok(opts) => opts,
350 Err(e) => {
351 if e.status.is_err() {
352 bail!(e.output);
353 } else {
354 println!("{}", e.output);
355 }
356 return Ok(());
357 }
358 };
359
360 base::syslog::init().context("failed to initialize syslog")?;
361
362 let wayland_paths: BTreeMap<_, _> = wayland_sock.into_iter().collect();
363
364 let resource_bridge_listeners = resource_bridge
365 .into_iter()
366 .map(|p| {
367 UnixSeqpacketListener::bind(&p)
368 .map(UnlinkUnixSeqpacketListener)
369 .with_context(|| format!("failed to bind socket at path {}", p))
370 })
371 .collect::<anyhow::Result<Vec<_>>>()?;
372
373 if gpu_parameters.displays.is_empty() {
374 gpu_parameters
375 .displays
376 .push(GpuDisplayParameters::default());
377 }
378
379 let ex = Executor::new().context("failed to create executor")?;
380
381 // We don't know the order in which other devices are going to connect to the resource bridges
382 // so start listening for all of them on separate threads. Any devices that connect after the
383 // gpu device starts its queues will not have its resource bridges processed. In practice this
384 // should be fine since the devices that use the resource bridge always try to connect to the
385 // gpu device before handling messages from the VM.
386 let resource_bridges = Arc::new(Mutex::new(Vec::with_capacity(
387 resource_bridge_listeners.len(),
388 )));
389 for listener in resource_bridge_listeners {
390 let resource_bridges = Arc::clone(&resource_bridges);
391 ex.spawn_blocking(move || match listener.accept() {
392 Ok(stream) => resource_bridges.lock().push(Tube::new(stream)),
393 Err(e) => {
394 let path = listener
395 .path()
396 .unwrap_or_else(|_| PathBuf::from("{unknown}"));
397 error!(
398 "Failed to accept resource bridge connection for socket {}: {}",
399 path.display(),
400 e
401 );
402 }
403 })
404 .detach();
405 }
406
407 let exit_evt = Event::new().context("failed to create Event")?;
408
409 // Initialized later.
410 let gpu_device_tube = None;
411
412 let mut display_backends = vec![
413 virtio::DisplayBackend::X(x_display),
414 virtio::DisplayBackend::Stub,
415 ];
416 if let Some(p) = wayland_paths.get("") {
417 display_backends.insert(0, virtio::DisplayBackend::Wayland(Some(p.to_owned())));
418 }
419
420 // These are only used when there is an input device.
421 let event_devices = Vec::new();
422
423 // This is only used in single-process mode, even for the regular gpu device.
424 let map_request = Arc::new(Mutex::new(None));
425
426 // The regular gpu device sets this to true when sandboxing is enabled. Assume that we
427 // are always sandboxed.
428 let external_blob = true;
429 let base_features = virtio::base_features(ProtectionType::Unprotected);
430 let channels = wayland_paths;
431
432 let gpu = Rc::new(RefCell::new(Gpu::new(
433 exit_evt,
434 gpu_device_tube,
435 Vec::new(), // resource_bridges, handled separately by us
436 display_backends,
437 &gpu_parameters,
438 None,
439 event_devices,
440 map_request,
441 external_blob,
442 base_features,
443 channels,
444 )));
445
446 let backend = GpuBackend {
447 ex: ex.clone(),
448 gpu,
449 resource_bridges,
450 acked_protocol_features: 0,
451 state: None,
452 fence_state: Default::default(),
453 display_worker: None,
454 workers: Default::default(),
455 };
456
457 let handler = DeviceRequestHandler::new(backend);
458 // run_until() returns an Result<Result<..>> which the ? operator lets us flatten.
459 ex.run_until(handler.run(socket, &ex))?
460 }
461