• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! Handles IPC for controlling the main VM process.
6 //!
7 //! The VM Control IPC protocol is synchronous, meaning that each `VmRequest` sent over a connection
8 //! will receive a `VmResponse` for that request next time data is received over that connection.
9 //!
10 //! The wire message format is a little-endian C-struct of fixed size, along with a file descriptor
11 //! if the request type expects one.
12 
13 pub mod api;
14 #[cfg(feature = "gdb")]
15 pub mod gdb;
16 #[cfg(feature = "gpu")]
17 pub mod gpu;
18 
19 use base::debug;
20 #[cfg(any(target_os = "android", target_os = "linux"))]
21 use base::linux::MemoryMappingBuilderUnix;
22 #[cfg(any(target_os = "android", target_os = "linux"))]
23 use base::sys::call_with_extended_max_files;
24 #[cfg(any(target_os = "android", target_os = "linux"))]
25 use base::MemoryMappingArena;
26 #[cfg(windows)]
27 use base::MemoryMappingBuilderWindows;
28 use hypervisor::BalloonEvent;
29 use hypervisor::MemCacheType;
30 use hypervisor::MemRegion;
31 use snapshot::AnySnapshot;
32 
33 #[cfg(feature = "balloon")]
34 mod balloon_tube;
35 pub mod client;
36 pub mod sys;
37 
38 #[cfg(target_arch = "x86_64")]
39 use std::arch::x86_64::_rdtsc;
40 use std::collections::BTreeMap;
41 use std::collections::BTreeSet;
42 use std::collections::HashMap;
43 use std::convert::TryInto;
44 use std::fmt;
45 use std::fmt::Display;
46 use std::fs::File;
47 use std::path::Path;
48 use std::path::PathBuf;
49 use std::result::Result as StdResult;
50 use std::str::FromStr;
51 use std::sync::mpsc;
52 use std::sync::Arc;
53 use std::time::Instant;
54 
55 use anyhow::bail;
56 use anyhow::Context;
57 use base::error;
58 use base::info;
59 use base::warn;
60 use base::with_as_descriptor;
61 use base::AsRawDescriptor;
62 use base::Descriptor;
63 use base::Error as SysError;
64 use base::Event;
65 use base::ExternalMapping;
66 use base::IntoRawDescriptor;
67 use base::MappedRegion;
68 use base::MemoryMappingBuilder;
69 use base::MmapError;
70 use base::Protection;
71 use base::Result;
72 use base::SafeDescriptor;
73 use base::SharedMemory;
74 use base::Tube;
75 use hypervisor::Datamatch;
76 use hypervisor::IoEventAddress;
77 use hypervisor::IrqRoute;
78 use hypervisor::IrqSource;
79 pub use hypervisor::MemSlot;
80 use hypervisor::Vm;
81 use hypervisor::VmCap;
82 use libc::EINVAL;
83 use libc::EIO;
84 use libc::ENODEV;
85 use libc::ENOTSUP;
86 use libc::ERANGE;
87 #[cfg(feature = "registered_events")]
88 use protos::registered_events;
89 use remain::sorted;
90 use resources::Alloc;
91 use resources::SystemAllocator;
92 use rutabaga_gfx::DeviceId;
93 use rutabaga_gfx::RutabagaDescriptor;
94 use rutabaga_gfx::RutabagaFromRawDescriptor;
95 use rutabaga_gfx::RutabagaGralloc;
96 use rutabaga_gfx::RutabagaHandle;
97 use rutabaga_gfx::RutabagaMappedRegion;
98 use rutabaga_gfx::VulkanInfo;
99 use serde::Deserialize;
100 use serde::Serialize;
101 use snapshot::SnapshotReader;
102 use snapshot::SnapshotWriter;
103 use swap::SwapStatus;
104 use sync::Mutex;
105 #[cfg(any(target_os = "android", target_os = "linux"))]
106 pub use sys::FsMappingRequest;
107 #[cfg(windows)]
108 pub use sys::InitialAudioSessionState;
109 #[cfg(any(target_os = "android", target_os = "linux"))]
110 pub use sys::VmMemoryMappingRequest;
111 #[cfg(any(target_os = "android", target_os = "linux"))]
112 pub use sys::VmMemoryMappingResponse;
113 use thiserror::Error;
114 pub use vm_control_product::GpuSendToMain;
115 pub use vm_control_product::GpuSendToService;
116 pub use vm_control_product::ServiceSendToGpu;
117 use vm_memory::GuestAddress;
118 
119 #[cfg(feature = "balloon")]
120 pub use crate::balloon_tube::BalloonControlCommand;
121 #[cfg(feature = "balloon")]
122 pub use crate::balloon_tube::BalloonTube;
123 #[cfg(feature = "gdb")]
124 pub use crate::gdb::VcpuDebug;
125 #[cfg(feature = "gdb")]
126 pub use crate::gdb::VcpuDebugStatus;
127 #[cfg(feature = "gdb")]
128 pub use crate::gdb::VcpuDebugStatusMessage;
129 #[cfg(feature = "gpu")]
130 use crate::gpu::GpuControlCommand;
131 #[cfg(feature = "gpu")]
132 use crate::gpu::GpuControlResult;
133 
134 /// Control the state of a particular VM CPU.
135 #[derive(Clone, Debug)]
136 pub enum VcpuControl {
137     #[cfg(feature = "gdb")]
138     Debug(VcpuDebug),
139     RunState(VmRunMode),
140     MakeRT,
141     // Request the current state of the vCPU. The result is sent back over the included channel.
142     GetStates(mpsc::Sender<VmRunMode>),
143     // Request the vcpu write a snapshot of itself to the writer, then send a `Result` back over
144     // the channel after completion/failure.
145     Snapshot(SnapshotWriter, mpsc::Sender<anyhow::Result<()>>),
146     Restore(VcpuRestoreRequest),
147     #[cfg(any(target_os = "android", target_os = "linux"))]
148     Throttle(u32),
149 }
150 
151 /// Request to restore a Vcpu from a given snapshot, and report the results
152 /// back via the provided channel.
153 #[derive(Clone, Debug)]
154 pub struct VcpuRestoreRequest {
155     pub result_sender: mpsc::Sender<anyhow::Result<()>>,
156     pub snapshot_reader: SnapshotReader,
157     #[cfg(target_arch = "x86_64")]
158     pub host_tsc_reference_moment: u64,
159 }
160 
161 /// Mode of execution for the VM.
162 #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
163 pub enum VmRunMode {
164     /// The default run mode indicating the VCPUs are running.
165     #[default]
166     Running,
167     /// Indicates that the VCPUs are suspending execution until the `Running` mode is set.
168     Suspending,
169     /// Indicates that the VM is exiting all processes.
170     Exiting,
171     /// Indicates that the VM is in a breakpoint waiting for the debugger to do continue.
172     Breakpoint,
173 }
174 
175 impl Display for VmRunMode {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result176     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
177         use self::VmRunMode::*;
178 
179         match self {
180             Running => write!(f, "running"),
181             Suspending => write!(f, "suspending"),
182             Exiting => write!(f, "exiting"),
183             Breakpoint => write!(f, "breakpoint"),
184         }
185     }
186 }
187 
188 // Trait for devices that get notification on specific GPE trigger
189 pub trait GpeNotify: Send {
notify(&mut self)190     fn notify(&mut self) {}
191 }
192 
193 // Trait for devices that get notification on specific PCI PME
194 pub trait PmeNotify: Send {
notify(&mut self, _requester_id: u16)195     fn notify(&mut self, _requester_id: u16) {}
196 }
197 
198 pub trait PmResource {
pwrbtn_evt(&mut self)199     fn pwrbtn_evt(&mut self) {}
slpbtn_evt(&mut self)200     fn slpbtn_evt(&mut self) {}
rtc_evt(&mut self, _clear_evt: Event)201     fn rtc_evt(&mut self, _clear_evt: Event) {}
gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>)202     fn gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>) {}
pme_evt(&mut self, _requester_id: u16)203     fn pme_evt(&mut self, _requester_id: u16) {}
register_gpe_notify_dev(&mut self, _gpe: u32, _notify_dev: Arc<Mutex<dyn GpeNotify>>)204     fn register_gpe_notify_dev(&mut self, _gpe: u32, _notify_dev: Arc<Mutex<dyn GpeNotify>>) {}
register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>)205     fn register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>) {}
206 }
207 
208 /// The maximum number of devices that can be listed in one `UsbControlCommand`.
209 ///
210 /// This value was set to be equal to `xhci_regs::MAX_PORTS` for convenience, but it is not
211 /// necessary for correctness. Importing that value directly would be overkill because it would
212 /// require adding a big dependency for a single const.
213 pub const USB_CONTROL_MAX_PORTS: usize = 16;
214 
215 #[derive(Serialize, Deserialize, Debug)]
216 pub enum DiskControlCommand {
217     /// Resize a disk to `new_size` in bytes.
218     Resize { new_size: u64 },
219 }
220 
221 impl Display for DiskControlCommand {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result222     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
223         use self::DiskControlCommand::*;
224 
225         match self {
226             Resize { new_size } => write!(f, "disk_resize {}", new_size),
227         }
228     }
229 }
230 
231 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
232 pub enum DiskControlResult {
233     Ok,
234     Err(SysError),
235 }
236 
237 /// Net control commands for adding and removing tap devices.
238 #[cfg(feature = "pci-hotplug")]
239 #[derive(Serialize, Deserialize, Debug)]
240 pub enum NetControlCommand {
241     AddTap(String),
242     RemoveTap(u8),
243 }
244 
245 #[derive(Serialize, Deserialize, Debug)]
246 pub enum UsbControlCommand {
247     AttachDevice {
248         #[serde(with = "with_as_descriptor")]
249         file: File,
250     },
251     AttachSecurityKey {
252         #[serde(with = "with_as_descriptor")]
253         file: File,
254     },
255     DetachDevice {
256         port: u8,
257     },
258     ListDevice {
259         ports: [u8; USB_CONTROL_MAX_PORTS],
260     },
261 }
262 
263 #[derive(Serialize, Deserialize, Copy, Clone, Debug, Default)]
264 pub struct UsbControlAttachedDevice {
265     pub port: u8,
266     pub vendor_id: u16,
267     pub product_id: u16,
268 }
269 
270 impl UsbControlAttachedDevice {
valid(self) -> bool271     pub fn valid(self) -> bool {
272         self.port != 0
273     }
274 }
275 
276 #[cfg(feature = "pci-hotplug")]
277 #[derive(Serialize, Deserialize, Debug, Clone)]
278 #[must_use]
279 /// Result for hotplug and removal of PCI device.
280 pub enum PciControlResult {
281     AddOk { bus: u8 },
282     ErrString(String),
283     RemoveOk,
284 }
285 
286 #[cfg(feature = "pci-hotplug")]
287 impl Display for PciControlResult {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result288     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
289         use self::PciControlResult::*;
290 
291         match self {
292             AddOk { bus } => write!(f, "add_ok {}", bus),
293             ErrString(e) => write!(f, "error: {}", e),
294             RemoveOk => write!(f, "remove_ok"),
295         }
296     }
297 }
298 
299 #[derive(Serialize, Deserialize, Debug, Clone)]
300 pub enum UsbControlResult {
301     Ok { port: u8 },
302     NoAvailablePort,
303     NoSuchDevice,
304     NoSuchPort,
305     FailedToOpenDevice,
306     Devices([UsbControlAttachedDevice; USB_CONTROL_MAX_PORTS]),
307     FailedToInitHostDevice,
308 }
309 
310 impl Display for UsbControlResult {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result311     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
312         use self::UsbControlResult::*;
313 
314         match self {
315             UsbControlResult::Ok { port } => write!(f, "ok {}", port),
316             NoAvailablePort => write!(f, "no_available_port"),
317             NoSuchDevice => write!(f, "no_such_device"),
318             NoSuchPort => write!(f, "no_such_port"),
319             FailedToOpenDevice => write!(f, "failed_to_open_device"),
320             Devices(devices) => {
321                 write!(f, "devices")?;
322                 for d in devices.iter().filter(|d| d.valid()) {
323                     write!(f, " {} {:04x} {:04x}", d.port, d.vendor_id, d.product_id)?;
324                 }
325                 std::result::Result::Ok(())
326             }
327             FailedToInitHostDevice => write!(f, "failed_to_init_host_device"),
328         }
329     }
330 }
331 
332 /// Commands for snapshot feature
333 #[derive(Serialize, Deserialize, Debug)]
334 pub enum SnapshotCommand {
335     Take {
336         snapshot_path: PathBuf,
337         compress_memory: bool,
338         encrypt: bool,
339     },
340 }
341 
342 /// Commands for actions on devices and the devices control thread.
343 #[derive(Serialize, Deserialize, Debug)]
344 pub enum DeviceControlCommand {
345     SleepDevices,
346     WakeDevices,
347     SnapshotDevices {
348         snapshot_writer: SnapshotWriter,
349         compress_memory: bool,
350     },
351     RestoreDevices {
352         snapshot_reader: SnapshotReader,
353     },
354     GetDevicesState,
355     Exit,
356 }
357 
358 /// Commands to control the IRQ handler thread.
359 #[derive(Serialize, Deserialize)]
360 pub enum IrqHandlerRequest {
361     /// No response is sent for this command.
362     AddIrqControlTubes(Vec<Tube>),
363     /// Refreshes the set of event tokens (Events) from the Irqchip that the IRQ
364     /// handler waits on to forward IRQs to their final destination (e.g. via
365     /// Irqchip::service_irq_event).
366     ///
367     /// If the set of tokens exposed by the Irqchip changes while the VM is
368     /// running (such as for snapshot restore), this command must be sent
369     /// otherwise the VM will not receive IRQs as expected.
370     RefreshIrqEventTokens,
371     WakeAndNotifyIteration,
372     /// No response is sent for this command.
373     Exit,
374 }
375 
376 const EXPECTED_MAX_IRQ_FLUSH_ITERATIONS: usize = 100;
377 
378 /// Response for [IrqHandlerRequest].
379 #[derive(Serialize, Deserialize, Debug)]
380 pub enum IrqHandlerResponse {
381     /// Sent when the IRQ event tokens have been refreshed.
382     IrqEventTokenRefreshComplete,
383     /// Specifies the number of tokens serviced in the requested iteration
384     /// (less the token for the `WakeAndNotifyIteration` request).
385     HandlerIterationComplete(usize),
386 }
387 
388 /// Source of a `VmMemoryRequest::RegisterMemory` mapping.
389 #[derive(Serialize, Deserialize)]
390 pub enum VmMemorySource {
391     /// Register shared memory represented by the given descriptor.
392     /// On Windows, descriptor MUST be a mapping handle.
393     SharedMemory(SharedMemory),
394     /// Register a file mapping from the given descriptor.
395     Descriptor {
396         /// File descriptor to map.
397         descriptor: SafeDescriptor,
398         /// Offset within the file in bytes.
399         offset: u64,
400         /// Size of the mapping in bytes.
401         size: u64,
402     },
403     /// Register memory mapped by Vulkano.
404     Vulkan {
405         descriptor: SafeDescriptor,
406         handle_type: u32,
407         memory_idx: u32,
408         device_uuid: [u8; 16],
409         driver_uuid: [u8; 16],
410         size: u64,
411     },
412     /// Register the current rutabaga external mapping.
413     ExternalMapping { ptr: u64, size: u64 },
414 }
415 
416 // The following are wrappers to avoid base dependencies in the rutabaga crate
to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor417 fn to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor {
418     // SAFETY:
419     // Safe because we own the SafeDescriptor at this point.
420     unsafe { RutabagaDescriptor::from_raw_descriptor(s.into_raw_descriptor()) }
421 }
422 
423 struct RutabagaMemoryRegion {
424     region: Box<dyn RutabagaMappedRegion>,
425 }
426 
427 impl RutabagaMemoryRegion {
new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion428     pub fn new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion {
429         RutabagaMemoryRegion { region }
430     }
431 }
432 
433 // SAFETY:
434 //
435 // Self guarantees `ptr`..`ptr+size` is an mmaped region owned by this object that
436 // can't be unmapped during the `MappedRegion`'s lifetime.
437 unsafe impl MappedRegion for RutabagaMemoryRegion {
as_ptr(&self) -> *mut u8438     fn as_ptr(&self) -> *mut u8 {
439         self.region.as_ptr()
440     }
441 
size(&self) -> usize442     fn size(&self) -> usize {
443         self.region.size()
444     }
445 }
446 
447 impl Display for VmMemorySource {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result448     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
449         use self::VmMemorySource::*;
450 
451         match self {
452             SharedMemory(..) => write!(f, "VmMemorySource::SharedMemory"),
453             Descriptor { .. } => write!(f, "VmMemorySource::Descriptor"),
454             Vulkan { .. } => write!(f, "VmMemorySource::Vulkan"),
455             ExternalMapping { .. } => write!(f, "VmMemorySource::ExternalMapping"),
456         }
457     }
458 }
459 
460 impl VmMemorySource {
461     /// Map the resource and return its mapping and size in bytes.
map( self, gralloc: &mut RutabagaGralloc, prot: Protection, ) -> Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)>462     pub fn map(
463         self,
464         gralloc: &mut RutabagaGralloc,
465         prot: Protection,
466     ) -> Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)> {
467         let (mem_region, size, descriptor) = match self {
468             VmMemorySource::Descriptor {
469                 descriptor,
470                 offset,
471                 size,
472             } => (
473                 map_descriptor(&descriptor, offset, size, prot)?,
474                 size,
475                 Some(descriptor),
476             ),
477 
478             VmMemorySource::SharedMemory(shm) => {
479                 (map_descriptor(&shm, 0, shm.size(), prot)?, shm.size(), None)
480             }
481             VmMemorySource::Vulkan {
482                 descriptor,
483                 handle_type,
484                 memory_idx,
485                 device_uuid,
486                 driver_uuid,
487                 size,
488             } => {
489                 let device_id = DeviceId {
490                     device_uuid,
491                     driver_uuid,
492                 };
493                 let mapped_region = match gralloc.import_and_map(
494                     RutabagaHandle {
495                         os_handle: to_rutabaga_desciptor(descriptor),
496                         handle_type,
497                     },
498                     VulkanInfo {
499                         memory_idx,
500                         device_id,
501                     },
502                     size,
503                 ) {
504                     Ok(mapped_region) => {
505                         let mapped_region: Box<dyn MappedRegion> =
506                             Box::new(RutabagaMemoryRegion::new(mapped_region));
507                         mapped_region
508                     }
509                     Err(e) => {
510                         error!("gralloc failed to import and map: {}", e);
511                         return Err(SysError::new(EINVAL));
512                     }
513                 };
514                 (mapped_region, size, None)
515             }
516             VmMemorySource::ExternalMapping { ptr, size } => {
517                 let mapped_region: Box<dyn MappedRegion> = Box::new(ExternalMapping {
518                     ptr,
519                     size: size as usize,
520                 });
521                 (mapped_region, size, None)
522             }
523         };
524         Ok((mem_region, size, descriptor))
525     }
526 }
527 
528 /// Destination of a `VmMemoryRequest::RegisterMemory` mapping in guest address space.
529 #[derive(Serialize, Deserialize)]
530 pub enum VmMemoryDestination {
531     /// Map at an offset within an existing PCI BAR allocation.
532     ExistingAllocation { allocation: Alloc, offset: u64 },
533     /// Map at the specified guest physical address.
534     GuestPhysicalAddress(u64),
535 }
536 
537 impl VmMemoryDestination {
538     /// Allocate and return the guest address of a memory mapping destination.
allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress>539     pub fn allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress> {
540         let addr = match self {
541             VmMemoryDestination::ExistingAllocation { allocation, offset } => allocator
542                 .mmio_allocator_any()
543                 .address_from_pci_offset(allocation, offset, size)
544                 .map_err(|_e| SysError::new(EINVAL))?,
545             VmMemoryDestination::GuestPhysicalAddress(gpa) => gpa,
546         };
547         Ok(GuestAddress(addr))
548     }
549 }
550 
551 /// Request to register or unregister an ioevent.
552 #[derive(Serialize, Deserialize)]
553 pub struct IoEventUpdateRequest {
554     pub event: Event,
555     pub addr: u64,
556     pub datamatch: Datamatch,
557     pub register: bool,
558 }
559 
560 /// Request to mmap a file to a shared memory.
561 /// This request is supposed to follow a `VmMemoryRequest::MmapAndRegisterMemory` request that
562 /// contains `SharedMemory` that `file` is mmaped to.
563 #[cfg(any(target_os = "android", target_os = "linux"))]
564 #[derive(Serialize, Deserialize)]
565 pub struct VmMemoryFileMapping {
566     #[serde(with = "with_as_descriptor")]
567     pub file: File,
568     pub length: usize,
569     pub mem_offset: usize,
570     pub file_offset: u64,
571 }
572 
573 #[derive(Serialize, Deserialize)]
574 pub enum VmMemoryRequest {
575     /// Prepare a shared memory region to make later operations more efficient. This
576     /// may be a no-op depending on underlying platform support.
577     PrepareSharedMemoryRegion { alloc: Alloc, cache: MemCacheType },
578     /// Register a memory to be mapped to the guest.
579     RegisterMemory {
580         /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
581         source: VmMemorySource,
582         /// Where to map the memory in the guest.
583         dest: VmMemoryDestination,
584         /// Whether to map the memory read only (true) or read-write (false).
585         prot: Protection,
586         /// Cache attribute for guest memory setting
587         cache: MemCacheType,
588     },
589     #[cfg(any(target_os = "android", target_os = "linux"))]
590     /// Call mmap to `shm` and register the memory region as a read-only guest memory.
591     /// This request is followed by an array of `VmMemoryFileMapping` with length
592     /// `num_file_mappings`
593     MmapAndRegisterMemory {
594         /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
595         shm: SharedMemory,
596         /// Where to map the memory in the guest.
597         dest: VmMemoryDestination,
598         /// Length of the array of `VmMemoryFileMapping` that follows.
599         num_file_mappings: usize,
600     },
601     /// Call hypervisor to free the given memory range.
602     DynamicallyFreeMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
603     /// Call hypervisor to reclaim a priorly freed memory range.
604     DynamicallyReclaimMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
605     /// Balloon allocation/deallocation target reached.
606     BalloonTargetReached { size: u64 },
607     /// Unregister the given memory slot that was previously registered with `RegisterMemory`.
608     UnregisterMemory(VmMemoryRegionId),
609     /// Register an eventfd with raw guest memory address.
610     IoEventRaw(IoEventUpdateRequest),
611 }
612 
613 /// Struct for managing `VmMemoryRequest`s IOMMU related state.
614 pub struct VmMemoryRequestIommuClient {
615     tube: Arc<Mutex<Tube>>,
616     registered_memory: BTreeSet<VmMemoryRegionId>,
617 }
618 
619 impl VmMemoryRequestIommuClient {
620     /// Constructs `VmMemoryRequestIommuClient` from a tube for communication with the viommu.
new(tube: Arc<Mutex<Tube>>) -> Self621     pub fn new(tube: Arc<Mutex<Tube>>) -> Self {
622         Self {
623             tube,
624             registered_memory: BTreeSet::new(),
625         }
626     }
627 }
628 
629 enum RegisteredMemory {
630     FixedMapping {
631         slot: MemSlot,
632         offset: usize,
633         size: usize,
634     },
635     DynamicMapping {
636         slot: MemSlot,
637     },
638 }
639 
640 pub struct VmMappedMemoryRegion {
641     guest_address: GuestAddress,
642     slot: MemSlot,
643 }
644 
645 #[derive(Default)]
646 pub struct VmMemoryRegionState {
647     mapped_regions: HashMap<Alloc, VmMappedMemoryRegion>,
648     registered_memory: BTreeMap<VmMemoryRegionId, RegisteredMemory>,
649 }
650 
try_map_to_prepared_region( vm: &mut impl Vm, region_state: &mut VmMemoryRegionState, source: &VmMemorySource, dest: &VmMemoryDestination, prot: &Protection, ) -> Option<VmMemoryResponse>651 fn try_map_to_prepared_region(
652     vm: &mut impl Vm,
653     region_state: &mut VmMemoryRegionState,
654     source: &VmMemorySource,
655     dest: &VmMemoryDestination,
656     prot: &Protection,
657 ) -> Option<VmMemoryResponse> {
658     let VmMemoryDestination::ExistingAllocation {
659         allocation,
660         offset: dest_offset,
661     } = dest
662     else {
663         return None;
664     };
665 
666     let VmMappedMemoryRegion {
667         guest_address,
668         slot,
669     } = region_state.mapped_regions.get(allocation)?;
670 
671     let (descriptor, file_offset, size) = match source {
672         VmMemorySource::Descriptor {
673             descriptor,
674             offset,
675             size,
676         } => (
677             Descriptor(descriptor.as_raw_descriptor()),
678             *offset,
679             *size as usize,
680         ),
681         VmMemorySource::SharedMemory(shm) => {
682             let size = shm.size() as usize;
683             (Descriptor(shm.as_raw_descriptor()), 0, size)
684         }
685         _ => {
686             error!(
687                 "source {} is not compatible with fixed mapping into prepared memory region",
688                 source
689             );
690             return Some(VmMemoryResponse::Err(SysError::new(EINVAL)));
691         }
692     };
693     if let Err(err) = vm.add_fd_mapping(
694         *slot,
695         *dest_offset as usize,
696         size,
697         &descriptor,
698         file_offset,
699         *prot,
700     ) {
701         return Some(VmMemoryResponse::Err(err));
702     }
703 
704     let guest_address = GuestAddress(guest_address.0 + dest_offset);
705     let region_id = VmMemoryRegionId(guest_address);
706     region_state.registered_memory.insert(
707         region_id,
708         RegisteredMemory::FixedMapping {
709             slot: *slot,
710             offset: *dest_offset as usize,
711             size,
712         },
713     );
714 
715     Some(VmMemoryResponse::RegisterMemory {
716         region_id,
717         slot: *slot,
718     })
719 }
720 
721 impl VmMemoryRequest {
722     /// Executes this request on the given Vm.
723     ///
724     /// # Arguments
725     /// * `vm` - The `Vm` to perform the request on.
726     /// * `allocator` - Used to allocate addresses.
727     ///
728     /// This does not return a result, instead encapsulating the success or failure in a
729     /// `VmMemoryResponse` with the intended purpose of sending the response back over the socket
730     /// that received this `VmMemoryResponse`.
execute( self, #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube, vm: &mut impl Vm, sys_allocator: &mut SystemAllocator, gralloc: &mut RutabagaGralloc, iommu_client: Option<&mut VmMemoryRequestIommuClient>, region_state: &mut VmMemoryRegionState, ) -> VmMemoryResponse731     pub fn execute(
732         self,
733         #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube,
734         vm: &mut impl Vm,
735         sys_allocator: &mut SystemAllocator,
736         gralloc: &mut RutabagaGralloc,
737         iommu_client: Option<&mut VmMemoryRequestIommuClient>,
738         region_state: &mut VmMemoryRegionState,
739     ) -> VmMemoryResponse {
740         use self::VmMemoryRequest::*;
741         match self {
742             PrepareSharedMemoryRegion { alloc, cache } => {
743                 // Currently the iommu_client is only used by virtio-gpu when used alongside GPU
744                 // pci-passthrough.
745                 //
746                 // TODO(b/323368701): Make compatible with iommu_client by ensuring that
747                 // VirtioIOMMUVfioCommand::VfioDmabufMap is submitted for both dynamic mappings and
748                 // fixed mappings (i.e. whether or not try_map_to_prepared_region succeeds in
749                 // RegisterMemory case below).
750                 assert!(iommu_client.is_none());
751 
752                 if !sys::should_prepare_memory_region() {
753                     return VmMemoryResponse::Ok;
754                 }
755 
756                 match sys::prepare_shared_memory_region(vm, sys_allocator, alloc, cache) {
757                     Ok(region) => {
758                         region_state.mapped_regions.insert(alloc, region);
759                         VmMemoryResponse::Ok
760                     }
761                     Err(e) => VmMemoryResponse::Err(e),
762                 }
763             }
764             RegisterMemory {
765                 source,
766                 dest,
767                 prot,
768                 cache,
769             } => {
770                 if let Some(resp) =
771                     try_map_to_prepared_region(vm, region_state, &source, &dest, &prot)
772                 {
773                     return resp;
774                 }
775 
776                 // Correct on Windows because callers of this IPC guarantee descriptor is a mapping
777                 // handle.
778                 let (mapped_region, size, descriptor) = match source.map(gralloc, prot) {
779                     Ok((region, size, descriptor)) => (region, size, descriptor),
780                     Err(e) => return VmMemoryResponse::Err(e),
781                 };
782 
783                 let guest_addr = match dest.allocate(sys_allocator, size) {
784                     Ok(addr) => addr,
785                     Err(e) => return VmMemoryResponse::Err(e),
786                 };
787 
788                 let slot = match vm.add_memory_region(
789                     guest_addr,
790                     mapped_region,
791                     prot == Protection::read(),
792                     false,
793                     cache,
794                 ) {
795                     Ok(slot) => slot,
796                     Err(e) => return VmMemoryResponse::Err(e),
797                 };
798 
799                 let region_id = VmMemoryRegionId(guest_addr);
800                 if let (Some(descriptor), Some(iommu_client)) = (descriptor, iommu_client) {
801                     let request =
802                         VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDmabufMap {
803                             region_id,
804                             gpa: guest_addr.0,
805                             size,
806                             dma_buf: descriptor,
807                         });
808 
809                     match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
810                         Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok)) => (),
811                         resp => {
812                             error!("Unexpected message response: {:?}", resp);
813                             // Ignore the result because there is nothing we can do with a failure.
814                             let _ = vm.remove_memory_region(slot);
815                             return VmMemoryResponse::Err(SysError::new(EINVAL));
816                         }
817                     };
818 
819                     iommu_client.registered_memory.insert(region_id);
820                 }
821 
822                 region_state
823                     .registered_memory
824                     .insert(region_id, RegisteredMemory::DynamicMapping { slot });
825                 VmMemoryResponse::RegisterMemory { region_id, slot }
826             }
827             #[cfg(any(target_os = "android", target_os = "linux"))]
828             MmapAndRegisterMemory {
829                 shm,
830                 dest,
831                 num_file_mappings,
832             } => {
833                 // Define a callback to be executed with extended limit of file counts.
834                 // It recieves `num_file_mappings` FDs and call `add_fd_mapping` for each.
835                 let callback = || {
836                     let mem = match MemoryMappingBuilder::new(shm.size() as usize)
837                         .from_shared_memory(&shm)
838                         .build()
839                     {
840                         Ok(mem) => mem,
841                         Err(e) => {
842                             error!("Failed to build MemoryMapping from shared memory: {:#}", e);
843                             return Err(VmMemoryResponse::Err(SysError::new(EINVAL)));
844                         }
845                     };
846                     let mut mmap_arena = MemoryMappingArena::from(mem);
847 
848                     // If `num_file_mappings` exceeds `SCM_MAX_FD`, `file_mappings` are sent in
849                     // chunks of length `SCM_MAX_FD`.
850                     let mut file_mappings = Vec::with_capacity(num_file_mappings);
851                     let mut read = 0;
852                     while read < num_file_mappings {
853                         let len = std::cmp::min(num_file_mappings - read, base::unix::SCM_MAX_FD);
854                         let mps: Vec<VmMemoryFileMapping> = match tube.recv_with_max_fds(len) {
855                             Ok(m) => m,
856                             Err(e) => {
857                                 error!(
858                                     "Failed to get {num_file_mappings} FDs to be mapped: {:#}",
859                                     e
860                                 );
861                                 return Err(VmMemoryResponse::Err(SysError::new(EINVAL)));
862                             }
863                         };
864                         file_mappings.extend(mps.into_iter());
865                         read += len;
866                     }
867 
868                     for VmMemoryFileMapping {
869                         mem_offset,
870                         length,
871                         file,
872                         file_offset,
873                     } in file_mappings
874                     {
875                         if let Err(e) = mmap_arena.add_fd_mapping(
876                             mem_offset,
877                             length,
878                             &file,
879                             file_offset,
880                             Protection::read(),
881                         ) {
882                             error!("Failed to add fd mapping: {:#}", e);
883                             return Err(VmMemoryResponse::Err(SysError::new(EINVAL)));
884                         }
885                     }
886                     Ok(mmap_arena)
887                 };
888                 let mmap_arena = match call_with_extended_max_files(callback) {
889                     Ok(Ok(m)) => m,
890                     Ok(Err(e)) => {
891                         return e;
892                     }
893                     Err(e) => {
894                         error!("Failed to set max count of file descriptors: {e}");
895                         return VmMemoryResponse::Err(e);
896                     }
897                 };
898 
899                 let size = shm.size();
900                 let guest_addr = match dest.allocate(sys_allocator, size) {
901                     Ok(addr) => addr,
902                     Err(e) => return VmMemoryResponse::Err(e),
903                 };
904 
905                 let slot = match vm.add_memory_region(
906                     guest_addr,
907                     Box::new(mmap_arena),
908                     true,
909                     false,
910                     MemCacheType::CacheCoherent,
911                 ) {
912                     Ok(slot) => slot,
913                     Err(e) => return VmMemoryResponse::Err(e),
914                 };
915 
916                 let region_id = VmMemoryRegionId(guest_addr);
917 
918                 region_state
919                     .registered_memory
920                     .insert(region_id, RegisteredMemory::DynamicMapping { slot });
921 
922                 VmMemoryResponse::RegisterMemory { region_id, slot }
923             }
924             UnregisterMemory(id) => match region_state.registered_memory.remove(&id) {
925                 Some(RegisteredMemory::DynamicMapping { slot }) => match vm
926                     .remove_memory_region(slot)
927                 {
928                     Ok(_) => {
929                         if let Some(iommu_client) = iommu_client {
930                             if iommu_client.registered_memory.remove(&id) {
931                                 let request = VirtioIOMMURequest::VfioCommand(
932                                     VirtioIOMMUVfioCommand::VfioDmabufUnmap(id),
933                                 );
934 
935                                 match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
936                                     Ok(VirtioIOMMUResponse::VfioResponse(
937                                         VirtioIOMMUVfioResult::Ok,
938                                     )) => VmMemoryResponse::Ok,
939                                     resp => {
940                                         error!("Unexpected message response: {:?}", resp);
941                                         VmMemoryResponse::Err(SysError::new(EINVAL))
942                                     }
943                                 }
944                             } else {
945                                 VmMemoryResponse::Ok
946                             }
947                         } else {
948                             VmMemoryResponse::Ok
949                         }
950                     }
951                     Err(e) => VmMemoryResponse::Err(e),
952                 },
953                 Some(RegisteredMemory::FixedMapping { slot, offset, size }) => {
954                     match vm.remove_mapping(slot, offset, size) {
955                         Ok(()) => VmMemoryResponse::Ok,
956                         Err(e) => VmMemoryResponse::Err(e),
957                     }
958                 }
959                 None => VmMemoryResponse::Err(SysError::new(EINVAL)),
960             },
961             DynamicallyFreeMemoryRanges { ranges } => {
962                 let mut r = VmMemoryResponse::Ok;
963                 for (guest_address, size) in ranges {
964                     match vm.handle_balloon_event(BalloonEvent::Inflate(MemRegion {
965                         guest_address,
966                         size,
967                     })) {
968                         Ok(_) => {}
969                         Err(e) => {
970                             r = VmMemoryResponse::Err(e);
971                             break;
972                         }
973                     }
974                 }
975                 r
976             }
977             DynamicallyReclaimMemoryRanges { ranges } => {
978                 let mut r = VmMemoryResponse::Ok;
979                 for (guest_address, size) in ranges {
980                     match vm.handle_balloon_event(BalloonEvent::Deflate(MemRegion {
981                         guest_address,
982                         size,
983                     })) {
984                         Ok(_) => {}
985                         Err(e) => {
986                             r = VmMemoryResponse::Err(e);
987                             break;
988                         }
989                     }
990                 }
991                 r
992             }
993             BalloonTargetReached { size } => {
994                 match vm.handle_balloon_event(BalloonEvent::BalloonTargetReached(size)) {
995                     Ok(_) => VmMemoryResponse::Ok,
996                     Err(e) => VmMemoryResponse::Err(e),
997                 }
998             }
999             IoEventRaw(request) => {
1000                 let res = if request.register {
1001                     vm.register_ioevent(
1002                         &request.event,
1003                         IoEventAddress::Mmio(request.addr),
1004                         request.datamatch,
1005                     )
1006                 } else {
1007                     vm.unregister_ioevent(
1008                         &request.event,
1009                         IoEventAddress::Mmio(request.addr),
1010                         request.datamatch,
1011                     )
1012                 };
1013                 match res {
1014                     Ok(_) => VmMemoryResponse::Ok,
1015                     Err(e) => VmMemoryResponse::Err(e),
1016                 }
1017             }
1018         }
1019     }
1020 }
1021 
1022 #[derive(Serialize, Deserialize, Debug, PartialOrd, PartialEq, Eq, Ord, Clone, Copy)]
1023 /// Identifer for registered memory regions. Globally unique.
1024 // The current implementation uses guest physical address as the unique identifier.
1025 pub struct VmMemoryRegionId(GuestAddress);
1026 
1027 #[derive(Serialize, Deserialize, Debug)]
1028 pub enum VmMemoryResponse {
1029     /// The request to register memory into guest address space was successful.
1030     RegisterMemory {
1031         region_id: VmMemoryRegionId,
1032         slot: u32,
1033     },
1034     Ok,
1035     Err(SysError),
1036 }
1037 
1038 #[derive(Serialize, Deserialize, Debug)]
1039 pub enum VmIrqRequest {
1040     /// Allocate one gsi, and associate gsi to irqfd with register_irqfd()
1041     AllocateOneMsi {
1042         irqfd: Event,
1043         device_id: u32,
1044         queue_id: usize,
1045         device_name: String,
1046     },
1047     /// Allocate a specific gsi to irqfd with register_irqfd(). This must only
1048     /// be used when it is known that the gsi is free. Only the snapshot
1049     /// subsystem can make this guarantee, and use of this request by any other
1050     /// caller is strongly discouraged.
1051     AllocateOneMsiAtGsi {
1052         irqfd: Event,
1053         gsi: u32,
1054         device_id: u32,
1055         queue_id: usize,
1056         device_name: String,
1057     },
1058     /// Add one msi route entry into the IRQ chip.
1059     AddMsiRoute {
1060         gsi: u32,
1061         msi_address: u64,
1062         msi_data: u32,
1063     },
1064     // unregister_irqfs() and release gsi
1065     ReleaseOneIrq {
1066         gsi: u32,
1067         irqfd: Event,
1068     },
1069 }
1070 
1071 /// Data to set up an IRQ event or IRQ route on the IRQ chip.
1072 /// VmIrqRequest::execute can't take an `IrqChip` argument, because of a dependency cycle between
1073 /// devices and vm_control, so it takes a Fn that processes an `IrqSetup`.
1074 pub enum IrqSetup<'a> {
1075     Event(u32, &'a Event, u32, usize, String),
1076     Route(IrqRoute),
1077     UnRegister(u32, &'a Event),
1078 }
1079 
1080 impl VmIrqRequest {
1081     /// Executes this request on the given Vm.
1082     ///
1083     /// # Arguments
1084     /// * `set_up_irq` - A function that applies an `IrqSetup` to an IRQ chip.
1085     ///
1086     /// This does not return a result, instead encapsulating the success or failure in a
1087     /// `VmIrqResponse` with the intended purpose of sending the response back over the socket
1088     /// that received this `VmIrqResponse`.
execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse where F: FnOnce(IrqSetup) -> Result<()>,1089     pub fn execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse
1090     where
1091         F: FnOnce(IrqSetup) -> Result<()>,
1092     {
1093         use self::VmIrqRequest::*;
1094         match *self {
1095             AllocateOneMsi {
1096                 ref irqfd,
1097                 device_id,
1098                 queue_id,
1099                 ref device_name,
1100             } => {
1101                 if let Some(irq_num) = sys_allocator.allocate_irq() {
1102                     match set_up_irq(IrqSetup::Event(
1103                         irq_num,
1104                         irqfd,
1105                         device_id,
1106                         queue_id,
1107                         device_name.clone(),
1108                     )) {
1109                         Ok(_) => VmIrqResponse::AllocateOneMsi { gsi: irq_num },
1110                         Err(e) => VmIrqResponse::Err(e),
1111                     }
1112                 } else {
1113                     VmIrqResponse::Err(SysError::new(EINVAL))
1114                 }
1115             }
1116             AllocateOneMsiAtGsi {
1117                 ref irqfd,
1118                 gsi,
1119                 device_id,
1120                 queue_id,
1121                 ref device_name,
1122             } => {
1123                 match set_up_irq(IrqSetup::Event(
1124                     gsi,
1125                     irqfd,
1126                     device_id,
1127                     queue_id,
1128                     device_name.clone(),
1129                 )) {
1130                     Ok(_) => VmIrqResponse::Ok,
1131                     Err(e) => VmIrqResponse::Err(e),
1132                 }
1133             }
1134             AddMsiRoute {
1135                 gsi,
1136                 msi_address,
1137                 msi_data,
1138             } => {
1139                 let route = IrqRoute {
1140                     gsi,
1141                     source: IrqSource::Msi {
1142                         address: msi_address,
1143                         data: msi_data,
1144                     },
1145                 };
1146                 match set_up_irq(IrqSetup::Route(route)) {
1147                     Ok(_) => VmIrqResponse::Ok,
1148                     Err(e) => VmIrqResponse::Err(e),
1149                 }
1150             }
1151             ReleaseOneIrq { gsi, ref irqfd } => {
1152                 let _ = set_up_irq(IrqSetup::UnRegister(gsi, irqfd));
1153                 sys_allocator.release_irq(gsi);
1154                 VmIrqResponse::Ok
1155             }
1156         }
1157     }
1158 }
1159 
1160 #[derive(Serialize, Deserialize, Debug)]
1161 pub enum VmIrqResponse {
1162     AllocateOneMsi { gsi: u32 },
1163     Ok,
1164     Err(SysError),
1165 }
1166 
1167 #[derive(Serialize, Deserialize, Debug, Clone)]
1168 pub enum DevicesState {
1169     Sleep,
1170     Wake,
1171 }
1172 
1173 #[derive(Serialize, Deserialize, Debug, Clone)]
1174 pub enum BatControlResult {
1175     Ok,
1176     NoBatDevice,
1177     NoSuchHealth,
1178     NoSuchProperty,
1179     NoSuchStatus,
1180     NoSuchBatType,
1181     StringParseIntErr,
1182     StringParseBoolErr,
1183 }
1184 
1185 impl Display for BatControlResult {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1186     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1187         use self::BatControlResult::*;
1188 
1189         match self {
1190             Ok => write!(f, "Setting battery property successfully"),
1191             NoBatDevice => write!(f, "No battery device created"),
1192             NoSuchHealth => write!(f, "Invalid Battery health setting. Only support: unknown/good/overheat/dead/overvoltage/unexpectedfailure/cold/watchdogtimerexpire/safetytimerexpire/overcurrent"),
1193             NoSuchProperty => write!(f, "Battery doesn't have such property. Only support: status/health/present/capacity/aconline"),
1194             NoSuchStatus => write!(f, "Invalid Battery status setting. Only support: unknown/charging/discharging/notcharging/full"),
1195             NoSuchBatType => write!(f, "Invalid Battery type setting. Only support: goldfish"),
1196             StringParseIntErr => write!(f, "Battery property target ParseInt error"),
1197             StringParseBoolErr => write!(f, "Battery property target ParseBool error"),
1198         }
1199     }
1200 }
1201 
1202 #[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, PartialEq, Eq)]
1203 #[serde(rename_all = "kebab-case")]
1204 pub enum BatteryType {
1205     #[default]
1206     Goldfish,
1207 }
1208 
1209 impl FromStr for BatteryType {
1210     type Err = BatControlResult;
1211 
from_str(s: &str) -> StdResult<Self, Self::Err>1212     fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1213         match s {
1214             "goldfish" => Ok(BatteryType::Goldfish),
1215             _ => Err(BatControlResult::NoSuchBatType),
1216         }
1217     }
1218 }
1219 
1220 #[derive(Serialize, Deserialize, Debug)]
1221 pub enum BatProperty {
1222     Status,
1223     Health,
1224     Present,
1225     Capacity,
1226     ACOnline,
1227     SetFakeBatConfig,
1228     CancelFakeBatConfig,
1229 }
1230 
1231 impl FromStr for BatProperty {
1232     type Err = BatControlResult;
1233 
from_str(s: &str) -> StdResult<Self, Self::Err>1234     fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1235         match s {
1236             "status" => Ok(BatProperty::Status),
1237             "health" => Ok(BatProperty::Health),
1238             "present" => Ok(BatProperty::Present),
1239             "capacity" => Ok(BatProperty::Capacity),
1240             "aconline" => Ok(BatProperty::ACOnline),
1241             "set_fake_bat_config" => Ok(BatProperty::SetFakeBatConfig),
1242             "cancel_fake_bat_config" => Ok(BatProperty::CancelFakeBatConfig),
1243             _ => Err(BatControlResult::NoSuchProperty),
1244         }
1245     }
1246 }
1247 
1248 impl Display for BatProperty {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1249     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1250         match *self {
1251             BatProperty::Status => write!(f, "status"),
1252             BatProperty::Health => write!(f, "health"),
1253             BatProperty::Present => write!(f, "present"),
1254             BatProperty::Capacity => write!(f, "capacity"),
1255             BatProperty::ACOnline => write!(f, "aconline"),
1256             BatProperty::SetFakeBatConfig => write!(f, "set_fake_bat_config"),
1257             BatProperty::CancelFakeBatConfig => write!(f, "cancel_fake_bat_config"),
1258         }
1259     }
1260 }
1261 
1262 #[derive(Serialize, Deserialize, Debug)]
1263 pub enum BatStatus {
1264     Unknown,
1265     Charging,
1266     DisCharging,
1267     NotCharging,
1268     Full,
1269 }
1270 
1271 impl BatStatus {
new(status: String) -> std::result::Result<Self, BatControlResult>1272     pub fn new(status: String) -> std::result::Result<Self, BatControlResult> {
1273         match status.as_str() {
1274             "unknown" => Ok(BatStatus::Unknown),
1275             "charging" => Ok(BatStatus::Charging),
1276             "discharging" => Ok(BatStatus::DisCharging),
1277             "notcharging" => Ok(BatStatus::NotCharging),
1278             "full" => Ok(BatStatus::Full),
1279             _ => Err(BatControlResult::NoSuchStatus),
1280         }
1281     }
1282 }
1283 
1284 impl FromStr for BatStatus {
1285     type Err = BatControlResult;
1286 
from_str(s: &str) -> StdResult<Self, Self::Err>1287     fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1288         match s {
1289             "unknown" => Ok(BatStatus::Unknown),
1290             "charging" => Ok(BatStatus::Charging),
1291             "discharging" => Ok(BatStatus::DisCharging),
1292             "notcharging" => Ok(BatStatus::NotCharging),
1293             "full" => Ok(BatStatus::Full),
1294             _ => Err(BatControlResult::NoSuchStatus),
1295         }
1296     }
1297 }
1298 
1299 impl From<BatStatus> for u32 {
from(status: BatStatus) -> Self1300     fn from(status: BatStatus) -> Self {
1301         status as u32
1302     }
1303 }
1304 
1305 #[derive(Serialize, Deserialize, Debug)]
1306 pub enum BatHealth {
1307     Unknown,
1308     Good,
1309     Overheat,
1310     Dead,
1311     OverVoltage,
1312     UnexpectedFailure,
1313     Cold,
1314     WatchdogTimerExpire,
1315     SafetyTimerExpire,
1316     OverCurrent,
1317 }
1318 
1319 impl FromStr for BatHealth {
1320     type Err = BatControlResult;
1321 
from_str(s: &str) -> StdResult<Self, Self::Err>1322     fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1323         match s {
1324             "unknown" => Ok(BatHealth::Unknown),
1325             "good" => Ok(BatHealth::Good),
1326             "overheat" => Ok(BatHealth::Overheat),
1327             "dead" => Ok(BatHealth::Dead),
1328             "overvoltage" => Ok(BatHealth::OverVoltage),
1329             "unexpectedfailure" => Ok(BatHealth::UnexpectedFailure),
1330             "cold" => Ok(BatHealth::Cold),
1331             "watchdogtimerexpire" => Ok(BatHealth::WatchdogTimerExpire),
1332             "safetytimerexpire" => Ok(BatHealth::SafetyTimerExpire),
1333             "overcurrent" => Ok(BatHealth::OverCurrent),
1334             _ => Err(BatControlResult::NoSuchHealth),
1335         }
1336     }
1337 }
1338 
1339 impl From<BatHealth> for u32 {
from(status: BatHealth) -> Self1340     fn from(status: BatHealth) -> Self {
1341         status as u32
1342     }
1343 }
1344 
1345 #[derive(Serialize, Deserialize, Debug)]
1346 pub enum BatControlCommand {
1347     SetStatus(BatStatus),
1348     SetHealth(BatHealth),
1349     SetPresent(u32),
1350     SetCapacity(u32),
1351     SetACOnline(u32),
1352     SetFakeBatConfig(u32),
1353     CancelFakeConfig,
1354 }
1355 
1356 impl BatControlCommand {
new(property: String, target: String) -> std::result::Result<Self, BatControlResult>1357     pub fn new(property: String, target: String) -> std::result::Result<Self, BatControlResult> {
1358         let cmd = property.parse::<BatProperty>()?;
1359         match cmd {
1360             BatProperty::Status => Ok(BatControlCommand::SetStatus(target.parse::<BatStatus>()?)),
1361             BatProperty::Health => Ok(BatControlCommand::SetHealth(target.parse::<BatHealth>()?)),
1362             BatProperty::Present => Ok(BatControlCommand::SetPresent(
1363                 target
1364                     .parse::<u32>()
1365                     .map_err(|_| BatControlResult::StringParseIntErr)?,
1366             )),
1367             BatProperty::Capacity => Ok(BatControlCommand::SetCapacity(
1368                 target
1369                     .parse::<u32>()
1370                     .map_err(|_| BatControlResult::StringParseIntErr)?,
1371             )),
1372             BatProperty::ACOnline => Ok(BatControlCommand::SetACOnline(
1373                 target
1374                     .parse::<u32>()
1375                     .map_err(|_| BatControlResult::StringParseIntErr)?,
1376             )),
1377             BatProperty::SetFakeBatConfig => Ok(BatControlCommand::SetFakeBatConfig(
1378                 target
1379                     .parse::<u32>()
1380                     .map_err(|_| BatControlResult::StringParseIntErr)?,
1381             )),
1382             BatProperty::CancelFakeBatConfig => Ok(BatControlCommand::CancelFakeConfig),
1383         }
1384     }
1385 }
1386 
1387 /// Used for VM to control battery properties.
1388 pub struct BatControl {
1389     pub type_: BatteryType,
1390     pub control_tube: Tube,
1391 }
1392 
1393 /// Used for VM to control for virtio-snd
1394 #[derive(Serialize, Deserialize, Debug)]
1395 pub enum SndControlCommand {
1396     MuteAll(bool),
1397 }
1398 
1399 // Used to mark hotplug pci device's device type
1400 #[derive(Serialize, Deserialize, Debug, Clone)]
1401 pub enum HotPlugDeviceType {
1402     UpstreamPort,
1403     DownstreamPort,
1404     EndPoint,
1405 }
1406 
1407 // Used for VM to hotplug pci devices
1408 #[derive(Serialize, Deserialize, Debug, Clone)]
1409 pub struct HotPlugDeviceInfo {
1410     pub device_type: HotPlugDeviceType,
1411     pub path: PathBuf,
1412     pub hp_interrupt: bool,
1413 }
1414 
1415 /// Message for communicating a suspend or resume to the virtio-pvclock device.
1416 #[derive(Serialize, Deserialize, Debug, Clone)]
1417 pub enum PvClockCommand {
1418     Suspend,
1419     Resume,
1420 }
1421 
1422 /// Message used by virtio-pvclock to communicate command results.
1423 #[derive(Serialize, Deserialize, Debug)]
1424 pub enum PvClockCommandResponse {
1425     Ok,
1426     Resumed { total_suspended_ticks: u64 },
1427     DeviceInactive,
1428     Err(SysError),
1429 }
1430 
1431 /// Commands for vmm-swap feature
1432 #[derive(Serialize, Deserialize, Debug)]
1433 pub enum SwapCommand {
1434     Enable,
1435     Trim,
1436     SwapOut,
1437     Disable { slow_file_cleanup: bool },
1438     Status,
1439 }
1440 
1441 ///
1442 /// A request to the main process to perform some operation on the VM.
1443 ///
1444 /// Unless otherwise noted, each request should expect a `VmResponse::Ok` to be received on success.
1445 #[derive(Serialize, Deserialize, Debug)]
1446 pub enum VmRequest {
1447     /// Break the VM's run loop and exit.
1448     Exit,
1449     /// Trigger a power button event in the guest.
1450     Powerbtn,
1451     /// Trigger a sleep button event in the guest.
1452     Sleepbtn,
1453     /// Trigger a RTC interrupt in the guest. When the irq associated with the RTC is
1454     /// resampled, it will be re-asserted as long as `clear_evt` is not signaled.
1455     Rtc { clear_evt: Event },
1456     /// Suspend the VM's VCPUs until resume.
1457     SuspendVcpus,
1458     /// Swap the memory content into files on a disk
1459     Swap(SwapCommand),
1460     /// Resume the VM's VCPUs that were previously suspended.
1461     ResumeVcpus,
1462     /// Inject a general-purpose event. If `clear_evt` is provided, when the irq associated
1463     /// with the GPE is resampled, it will be re-asserted as long as `clear_evt` is not
1464     /// signaled.
1465     Gpe { gpe: u32, clear_evt: Option<Event> },
1466     /// Inject a PCI PME
1467     PciPme(u16),
1468     /// Make the VM's RT VCPU real-time.
1469     MakeRT,
1470     /// Command for balloon driver.
1471     #[cfg(feature = "balloon")]
1472     BalloonCommand(BalloonControlCommand),
1473     /// Send a command to a disk chosen by `disk_index`.
1474     /// `disk_index` is a 0-based count of `--disk`, `--rwdisk`, and `-r` command-line options.
1475     DiskCommand {
1476         disk_index: usize,
1477         command: DiskControlCommand,
1478     },
1479     /// Command to use controller.
1480     UsbCommand(UsbControlCommand),
1481     /// Command to modify the gpu.
1482     #[cfg(feature = "gpu")]
1483     GpuCommand(GpuControlCommand),
1484     /// Command to set battery.
1485     BatCommand(BatteryType, BatControlCommand),
1486     /// Command to control snd devices
1487     #[cfg(feature = "audio")]
1488     SndCommand(SndControlCommand),
1489     /// Command to add/remove multiple vfio-pci devices
1490     HotPlugVfioCommand {
1491         device: HotPlugDeviceInfo,
1492         add: bool,
1493     },
1494     /// Command to add/remove network tap device as virtio-pci device
1495     #[cfg(feature = "pci-hotplug")]
1496     HotPlugNetCommand(NetControlCommand),
1497     /// Command to Snapshot devices
1498     Snapshot(SnapshotCommand),
1499     /// Register for event notification
1500     RegisterListener {
1501         socket_addr: String,
1502         event: RegisteredEvent,
1503     },
1504     /// Unregister for notifications for event
1505     UnregisterListener {
1506         socket_addr: String,
1507         event: RegisteredEvent,
1508     },
1509     /// Unregister for all event notification
1510     Unregister { socket_addr: String },
1511     /// Suspend VM VCPUs and Devices until resume.
1512     SuspendVm,
1513     /// Resume VM VCPUs and Devices.
1514     ResumeVm,
1515     /// Returns Vcpus PID/TID
1516     VcpuPidTid,
1517     /// Throttles the requested vCPU for microseconds
1518     Throttle(usize, u32),
1519     /// Returns unique descriptor of this VM.
1520     GetVmDescriptor,
1521 }
1522 
1523 /// NOTE: when making any changes to this enum please also update
1524 /// RegisteredEventFfi in crosvm_control/src/lib.rs
1525 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
1526 pub enum RegisteredEvent {
1527     VirtioBalloonWsReport,
1528     VirtioBalloonResize,
1529     VirtioBalloonOOMDeflation,
1530 }
1531 
1532 #[derive(Serialize, Deserialize, Debug)]
1533 pub enum RegisteredEventWithData {
1534     VirtioBalloonWsReport {
1535         ws_buckets: Vec<balloon_control::WSBucket>,
1536         balloon_actual: u64,
1537     },
1538     VirtioBalloonResize,
1539     VirtioBalloonOOMDeflation,
1540 }
1541 
1542 impl RegisteredEventWithData {
into_event(&self) -> RegisteredEvent1543     pub fn into_event(&self) -> RegisteredEvent {
1544         match self {
1545             Self::VirtioBalloonWsReport { .. } => RegisteredEvent::VirtioBalloonWsReport,
1546             Self::VirtioBalloonResize => RegisteredEvent::VirtioBalloonResize,
1547             Self::VirtioBalloonOOMDeflation => RegisteredEvent::VirtioBalloonOOMDeflation,
1548         }
1549     }
1550 
1551     #[cfg(feature = "registered_events")]
into_proto(&self) -> registered_events::RegisteredEvent1552     pub fn into_proto(&self) -> registered_events::RegisteredEvent {
1553         match self {
1554             Self::VirtioBalloonWsReport {
1555                 ws_buckets,
1556                 balloon_actual,
1557             } => {
1558                 let mut report = registered_events::VirtioBalloonWsReport {
1559                     balloon_actual: *balloon_actual,
1560                     ..registered_events::VirtioBalloonWsReport::new()
1561                 };
1562                 for ws in ws_buckets {
1563                     report.ws_buckets.push(registered_events::VirtioWsBucket {
1564                         age: ws.age,
1565                         file_bytes: ws.bytes[0],
1566                         anon_bytes: ws.bytes[1],
1567                         ..registered_events::VirtioWsBucket::new()
1568                     });
1569                 }
1570                 let mut event = registered_events::RegisteredEvent::new();
1571                 event.set_ws_report(report);
1572                 event
1573             }
1574             Self::VirtioBalloonResize => {
1575                 let mut event = registered_events::RegisteredEvent::new();
1576                 event.set_resize(registered_events::VirtioBalloonResize::new());
1577                 event
1578             }
1579             Self::VirtioBalloonOOMDeflation => {
1580                 let mut event = registered_events::RegisteredEvent::new();
1581                 event.set_oom_deflation(registered_events::VirtioBalloonOOMDeflation::new());
1582                 event
1583             }
1584         }
1585     }
1586 
from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self1587     pub fn from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self {
1588         RegisteredEventWithData::VirtioBalloonWsReport {
1589             ws_buckets: ws.ws.clone(),
1590             balloon_actual,
1591         }
1592     }
1593 }
1594 
handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse1595 pub fn handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse {
1596     // Forward the request to the block device process via its control socket.
1597     if let Err(e) = disk_host_tube.send(command) {
1598         error!("disk socket send failed: {}", e);
1599         return VmResponse::Err(SysError::new(EINVAL));
1600     }
1601 
1602     // Wait for the disk control command to be processed
1603     match disk_host_tube.recv() {
1604         Ok(DiskControlResult::Ok) => VmResponse::Ok,
1605         Ok(DiskControlResult::Err(e)) => VmResponse::Err(e),
1606         Err(e) => {
1607             error!("disk socket recv failed: {}", e);
1608             VmResponse::Err(SysError::new(EINVAL))
1609         }
1610     }
1611 }
1612 
1613 /// WARNING: descriptor must be a mapping handle on Windows.
map_descriptor( descriptor: &dyn AsRawDescriptor, offset: u64, size: u64, prot: Protection, ) -> Result<Box<dyn MappedRegion>>1614 fn map_descriptor(
1615     descriptor: &dyn AsRawDescriptor,
1616     offset: u64,
1617     size: u64,
1618     prot: Protection,
1619 ) -> Result<Box<dyn MappedRegion>> {
1620     let size: usize = size.try_into().map_err(|_e| SysError::new(ERANGE))?;
1621     match MemoryMappingBuilder::new(size)
1622         .from_descriptor(descriptor)
1623         .offset(offset)
1624         .protection(prot)
1625         .build()
1626     {
1627         Ok(mmap) => Ok(Box::new(mmap)),
1628         Err(MmapError::SystemCallFailed(e)) => Err(e),
1629         _ => Err(SysError::new(EINVAL)),
1630     }
1631 }
1632 
1633 // Get vCPU state. vCPUs are expected to all hold the same state.
1634 // In this function, there may be a time where vCPUs are not holding the same state
1635 // as they transition from one state to the other. This is expected, and the final result
1636 // should be all vCPUs holding the same state.
get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode>1637 fn get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode> {
1638     let (send_chan, recv_chan) = mpsc::channel();
1639     kick_vcpus(VcpuControl::GetStates(send_chan));
1640     if vcpu_num == 0 {
1641         bail!("vcpu_num is zero");
1642     }
1643     let mut current_mode_vec: Vec<VmRunMode> = Vec::new();
1644     for _ in 0..vcpu_num {
1645         match recv_chan.recv() {
1646             Ok(state) => current_mode_vec.push(state),
1647             Err(e) => {
1648                 bail!("Failed to get vCPU state: {}", e);
1649             }
1650         };
1651     }
1652     let first_state = current_mode_vec[0];
1653     if first_state == VmRunMode::Exiting {
1654         panic!("Attempt to snapshot while exiting.");
1655     }
1656     if current_mode_vec.iter().any(|x| *x != first_state) {
1657         // We do not panic here. It could be that vCPUs are transitioning from one mode to another.
1658         bail!("Unknown VM state: vCPUs hold different states.");
1659     }
1660     Ok(first_state)
1661 }
1662 
1663 /// A guard to guarantee that all the vCPUs are suspended during the scope.
1664 ///
1665 /// When this guard is dropped, it rolls back the state of CPUs.
1666 pub struct VcpuSuspendGuard<'a> {
1667     saved_run_mode: VmRunMode,
1668     kick_vcpus: &'a dyn Fn(VcpuControl),
1669 }
1670 
1671 impl<'a> VcpuSuspendGuard<'a> {
1672     /// Check the all vCPU state and suspend the vCPUs if they are running.
1673     ///
1674     /// This returns [VcpuSuspendGuard] to rollback the vcpu state.
1675     ///
1676     /// # Arguments
1677     ///
1678     /// * `kick_vcpus` - A funtion to send [VcpuControl] message to all the vCPUs and interrupt
1679     ///   them.
1680     /// * `vcpu_num` - The number of vCPUs.
new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self>1681     pub fn new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self> {
1682         // get initial vcpu state
1683         let saved_run_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1684         match saved_run_mode {
1685             VmRunMode::Running => {
1686                 kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1687                 // Blocking call, waiting for response to ensure vCPU state was updated.
1688                 // In case of failure, where a vCPU still has the state running, start up vcpus and
1689                 // abort operation.
1690                 let current_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1691                 if current_mode != VmRunMode::Suspending {
1692                     kick_vcpus(VcpuControl::RunState(saved_run_mode));
1693                     bail!("vCPUs failed to all suspend. Kicking back all vCPUs to their previous state: {saved_run_mode}");
1694                 }
1695             }
1696             VmRunMode::Suspending => {
1697                 // do nothing. keep the state suspending.
1698             }
1699             other => {
1700                 bail!("vcpus are not in running/suspending state, but {}", other);
1701             }
1702         };
1703         Ok(Self {
1704             saved_run_mode,
1705             kick_vcpus,
1706         })
1707     }
1708 }
1709 
1710 impl Drop for VcpuSuspendGuard<'_> {
drop(&mut self)1711     fn drop(&mut self) {
1712         if self.saved_run_mode != VmRunMode::Suspending {
1713             (self.kick_vcpus)(VcpuControl::RunState(self.saved_run_mode));
1714         }
1715     }
1716 }
1717 
1718 /// A guard to guarantee that all devices are sleeping during its scope.
1719 ///
1720 /// When this guard is dropped, it wakes the devices.
1721 pub struct DeviceSleepGuard<'a> {
1722     device_control_tube: &'a Tube,
1723     devices_state: DevicesState,
1724 }
1725 
1726 impl<'a> DeviceSleepGuard<'a> {
new(device_control_tube: &'a Tube) -> anyhow::Result<Self>1727     fn new(device_control_tube: &'a Tube) -> anyhow::Result<Self> {
1728         device_control_tube
1729             .send(&DeviceControlCommand::GetDevicesState)
1730             .context("send command to devices control socket")?;
1731         let devices_state = match device_control_tube
1732             .recv()
1733             .context("receive from devices control socket")?
1734         {
1735             VmResponse::DevicesState(state) => state,
1736             resp => bail!("failed to get devices state. Unexpected behavior: {}", resp),
1737         };
1738         if let DevicesState::Wake = devices_state {
1739             device_control_tube
1740                 .send(&DeviceControlCommand::SleepDevices)
1741                 .context("send command to devices control socket")?;
1742             match device_control_tube
1743                 .recv()
1744                 .context("receive from devices control socket")?
1745             {
1746                 VmResponse::Ok => (),
1747                 resp => bail!("device sleep failed: {}", resp),
1748             }
1749         }
1750         Ok(Self {
1751             device_control_tube,
1752             devices_state,
1753         })
1754     }
1755 }
1756 
1757 impl Drop for DeviceSleepGuard<'_> {
drop(&mut self)1758     fn drop(&mut self) {
1759         if let DevicesState::Wake = self.devices_state {
1760             if let Err(e) = self
1761                 .device_control_tube
1762                 .send(&DeviceControlCommand::WakeDevices)
1763             {
1764                 panic!("failed to request device wake after snapshot: {}", e);
1765             }
1766             match self.device_control_tube.recv() {
1767                 Ok(VmResponse::Ok) => (),
1768                 Ok(resp) => panic!("unexpected response to device wake request: {}", resp),
1769                 Err(e) => panic!("failed to get reply for device wake request: {}", e),
1770             }
1771         }
1772     }
1773 }
1774 
1775 impl VmRequest {
1776     /// Executes this request on the given Vm and other mutable state.
1777     ///
1778     /// This does not return a result, instead encapsulating the success or failure in a
1779     /// `VmResponse` with the intended purpose of sending the response back over the  socket that
1780     /// received this `VmRequest`.
1781     ///
1782     /// `suspended_pvclock_state`: If the hypervisor has its own pvclock (not the same as
1783     /// virtio-pvclock) and the VM is suspended (not just the vCPUs, but the full VM), then
1784     /// `suspended_pvclock_state` will be used to store the ClockState saved just after the vCPUs
1785     /// were suspended. It is important that we save the value right after the vCPUs are suspended
1786     /// and restore it right before the vCPUs are resumed (instead of, more naturally, during the
1787     /// snapshot/restore steps) because the pvclock continues to tick even when the vCPUs are
1788     /// suspended.
1789     #[allow(unused_variables)]
execute( &self, vm: &impl Vm, disk_host_tubes: &[Tube], snd_host_tubes: &[Tube], pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>, gpu_control_tube: Option<&Tube>, usb_control_tube: Option<&Tube>, bat_control: &mut Option<BatControl>, kick_vcpus: impl Fn(VcpuControl), #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl), force_s2idle: bool, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, device_control_tube: &Tube, vcpu_size: usize, irq_handler_control: &Tube, snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>, suspended_pvclock_state: &mut Option<hypervisor::ClockState>, ) -> VmResponse1790     pub fn execute(
1791         &self,
1792         vm: &impl Vm,
1793         disk_host_tubes: &[Tube],
1794         snd_host_tubes: &[Tube],
1795         pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
1796         gpu_control_tube: Option<&Tube>,
1797         usb_control_tube: Option<&Tube>,
1798         bat_control: &mut Option<BatControl>,
1799         kick_vcpus: impl Fn(VcpuControl),
1800         #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl),
1801         force_s2idle: bool,
1802         #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1803         device_control_tube: &Tube,
1804         vcpu_size: usize,
1805         irq_handler_control: &Tube,
1806         snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
1807         suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
1808     ) -> VmResponse {
1809         match self {
1810             VmRequest::Exit => {
1811                 panic!("VmRequest::Exit should be handled by the platform run loop");
1812             }
1813             VmRequest::Powerbtn => {
1814                 if let Some(pm) = pm {
1815                     pm.lock().pwrbtn_evt();
1816                     VmResponse::Ok
1817                 } else {
1818                     error!("{:#?} not supported", *self);
1819                     VmResponse::Err(SysError::new(ENOTSUP))
1820                 }
1821             }
1822             VmRequest::Sleepbtn => {
1823                 if let Some(pm) = pm {
1824                     pm.lock().slpbtn_evt();
1825                     VmResponse::Ok
1826                 } else {
1827                     error!("{:#?} not supported", *self);
1828                     VmResponse::Err(SysError::new(ENOTSUP))
1829                 }
1830             }
1831             VmRequest::Rtc { clear_evt } => {
1832                 if let Some(pm) = pm.as_ref() {
1833                     match clear_evt.try_clone() {
1834                         Ok(clear_evt) => {
1835                             // RTC event will asynchronously trigger wakeup.
1836                             pm.lock().rtc_evt(clear_evt);
1837                             VmResponse::Ok
1838                         }
1839                         Err(err) => {
1840                             error!("Error cloning clear_evt: {:?}", err);
1841                             VmResponse::Err(SysError::new(EIO))
1842                         }
1843                     }
1844                 } else {
1845                     error!("{:#?} not supported", *self);
1846                     VmResponse::Err(SysError::new(ENOTSUP))
1847                 }
1848             }
1849             VmRequest::SuspendVcpus => {
1850                 if !force_s2idle {
1851                     kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1852                     let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
1853                         Ok(state) => state,
1854                         Err(e) => {
1855                             error!("failed to get vcpu state: {e}");
1856                             return VmResponse::Err(SysError::new(EIO));
1857                         }
1858                     };
1859                     if current_mode != VmRunMode::Suspending {
1860                         error!("vCPUs failed to all suspend.");
1861                         return VmResponse::Err(SysError::new(EIO));
1862                     }
1863                 }
1864                 VmResponse::Ok
1865             }
1866             VmRequest::ResumeVcpus => {
1867                 if let Err(e) = device_control_tube.send(&DeviceControlCommand::GetDevicesState) {
1868                     error!("failed to send GetDevicesState: {}", e);
1869                     return VmResponse::Err(SysError::new(EIO));
1870                 }
1871                 let devices_state = match device_control_tube.recv() {
1872                     Ok(VmResponse::DevicesState(state)) => state,
1873                     Ok(resp) => {
1874                         error!("failed to get devices state. Unexpected behavior: {}", resp);
1875                         return VmResponse::Err(SysError::new(EINVAL));
1876                     }
1877                     Err(e) => {
1878                         error!("failed to get devices state. Unexpected behavior: {}", e);
1879                         return VmResponse::Err(SysError::new(EINVAL));
1880                     }
1881                 };
1882                 if let DevicesState::Sleep = devices_state {
1883                     error!("Trying to wake Vcpus while Devices are asleep. Did you mean to use `crosvm resume --full`?");
1884                     return VmResponse::Err(SysError::new(EINVAL));
1885                 }
1886 
1887                 if force_s2idle {
1888                     // During resume also emulate powerbtn event which will allow to wakeup fully
1889                     // suspended guest.
1890                     if let Some(pm) = pm {
1891                         pm.lock().pwrbtn_evt();
1892                     } else {
1893                         error!("triggering power btn during resume not supported");
1894                         return VmResponse::Err(SysError::new(ENOTSUP));
1895                     }
1896                 }
1897 
1898                 kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
1899                 VmResponse::Ok
1900             }
1901             VmRequest::Swap(SwapCommand::Enable) => {
1902                 #[cfg(feature = "swap")]
1903                 if let Some(swap_controller) = swap_controller {
1904                     // Suspend all vcpus and devices while vmm-swap is enabling (move the guest
1905                     // memory contents to the staging memory) to guarantee no processes other than
1906                     // the swap monitor process access the guest memory.
1907                     let _vcpu_guard = match VcpuSuspendGuard::new(&kick_vcpus, vcpu_size) {
1908                         Ok(guard) => guard,
1909                         Err(e) => {
1910                             error!("failed to suspend vcpus: {:?}", e);
1911                             return VmResponse::Err(SysError::new(EINVAL));
1912                         }
1913                     };
1914                     // TODO(b/253386409): Use `devices::Suspendable::sleep()` instead of sending
1915                     // `SIGSTOP` signal.
1916                     let _devices_guard = match swap_controller.suspend_devices() {
1917                         Ok(guard) => guard,
1918                         Err(e) => {
1919                             error!("failed to suspend devices: {:?}", e);
1920                             return VmResponse::Err(SysError::new(EINVAL));
1921                         }
1922                     };
1923 
1924                     return match swap_controller.enable() {
1925                         Ok(()) => VmResponse::Ok,
1926                         Err(e) => {
1927                             error!("swap enable failed: {}", e);
1928                             VmResponse::Err(SysError::new(EINVAL))
1929                         }
1930                     };
1931                 }
1932                 VmResponse::Err(SysError::new(ENOTSUP))
1933             }
1934             VmRequest::Swap(SwapCommand::Trim) => {
1935                 #[cfg(feature = "swap")]
1936                 if let Some(swap_controller) = swap_controller {
1937                     return match swap_controller.trim() {
1938                         Ok(()) => VmResponse::Ok,
1939                         Err(e) => {
1940                             error!("swap trim failed: {}", e);
1941                             VmResponse::Err(SysError::new(EINVAL))
1942                         }
1943                     };
1944                 }
1945                 VmResponse::Err(SysError::new(ENOTSUP))
1946             }
1947             VmRequest::Swap(SwapCommand::SwapOut) => {
1948                 #[cfg(feature = "swap")]
1949                 if let Some(swap_controller) = swap_controller {
1950                     return match swap_controller.swap_out() {
1951                         Ok(()) => VmResponse::Ok,
1952                         Err(e) => {
1953                             error!("swap out failed: {}", e);
1954                             VmResponse::Err(SysError::new(EINVAL))
1955                         }
1956                     };
1957                 }
1958                 VmResponse::Err(SysError::new(ENOTSUP))
1959             }
1960             VmRequest::Swap(SwapCommand::Disable {
1961                 #[cfg(feature = "swap")]
1962                 slow_file_cleanup,
1963                 ..
1964             }) => {
1965                 #[cfg(feature = "swap")]
1966                 if let Some(swap_controller) = swap_controller {
1967                     return match swap_controller.disable(*slow_file_cleanup) {
1968                         Ok(()) => VmResponse::Ok,
1969                         Err(e) => {
1970                             error!("swap disable failed: {}", e);
1971                             VmResponse::Err(SysError::new(EINVAL))
1972                         }
1973                     };
1974                 }
1975                 VmResponse::Err(SysError::new(ENOTSUP))
1976             }
1977             VmRequest::Swap(SwapCommand::Status) => {
1978                 #[cfg(feature = "swap")]
1979                 if let Some(swap_controller) = swap_controller {
1980                     return match swap_controller.status() {
1981                         Ok(status) => VmResponse::SwapStatus(status),
1982                         Err(e) => {
1983                             error!("swap status failed: {}", e);
1984                             VmResponse::Err(SysError::new(EINVAL))
1985                         }
1986                     };
1987                 }
1988                 VmResponse::Err(SysError::new(ENOTSUP))
1989             }
1990             VmRequest::SuspendVm => {
1991                 info!("Starting crosvm suspend");
1992                 kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1993                 let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
1994                     Ok(state) => state,
1995                     Err(e) => {
1996                         error!("failed to get vcpu state: {e}");
1997                         return VmResponse::Err(SysError::new(EIO));
1998                     }
1999                 };
2000                 if current_mode != VmRunMode::Suspending {
2001                     error!("vCPUs failed to all suspend.");
2002                     return VmResponse::Err(SysError::new(EIO));
2003                 }
2004                 // Snapshot the pvclock ASAP after stopping vCPUs.
2005                 if vm.check_capability(VmCap::PvClock) {
2006                     if suspended_pvclock_state.is_none() {
2007                         *suspended_pvclock_state = Some(match vm.get_pvclock() {
2008                             Ok(x) => x,
2009                             Err(e) => {
2010                                 error!("suspend_pvclock failed: {e:?}");
2011                                 return VmResponse::Err(SysError::new(EIO));
2012                             }
2013                         });
2014                     }
2015                 }
2016                 if let Err(e) = device_control_tube
2017                     .send(&DeviceControlCommand::SleepDevices)
2018                     .context("send command to devices control socket")
2019                 {
2020                     error!("{:?}", e);
2021                     return VmResponse::Err(SysError::new(EIO));
2022                 };
2023                 match device_control_tube
2024                     .recv()
2025                     .context("receive from devices control socket")
2026                 {
2027                     Ok(VmResponse::Ok) => {
2028                         info!("Finished crosvm suspend successfully");
2029                         VmResponse::Ok
2030                     }
2031                     Ok(resp) => {
2032                         error!("device sleep failed: {}", resp);
2033                         VmResponse::Err(SysError::new(EIO))
2034                     }
2035                     Err(e) => {
2036                         error!("receive from devices control socket: {:?}", e);
2037                         VmResponse::Err(SysError::new(EIO))
2038                     }
2039                 }
2040             }
2041             VmRequest::ResumeVm => {
2042                 info!("Starting crosvm resume");
2043                 if let Err(e) = device_control_tube
2044                     .send(&DeviceControlCommand::WakeDevices)
2045                     .context("send command to devices control socket")
2046                 {
2047                     error!("{:?}", e);
2048                     return VmResponse::Err(SysError::new(EIO));
2049                 };
2050                 match device_control_tube
2051                     .recv()
2052                     .context("receive from devices control socket")
2053                 {
2054                     Ok(VmResponse::Ok) => {
2055                         info!("Finished crosvm resume successfully");
2056                     }
2057                     Ok(resp) => {
2058                         error!("device wake failed: {}", resp);
2059                         return VmResponse::Err(SysError::new(EIO));
2060                     }
2061                     Err(e) => {
2062                         error!("receive from devices control socket: {:?}", e);
2063                         return VmResponse::Err(SysError::new(EIO));
2064                     }
2065                 }
2066                 // Resume the pvclock as late as possible before starting vCPUs.
2067                 if vm.check_capability(VmCap::PvClock) {
2068                     // If None, then we aren't suspended, which is a valid case.
2069                     if let Some(x) = suspended_pvclock_state {
2070                         if let Err(e) = vm.set_pvclock(x) {
2071                             error!("resume_pvclock failed: {e:?}");
2072                             return VmResponse::Err(SysError::new(EIO));
2073                         }
2074                     }
2075                 }
2076                 kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2077                 VmResponse::Ok
2078             }
2079             VmRequest::Gpe { gpe, clear_evt } => {
2080                 if let Some(pm) = pm.as_ref() {
2081                     match clear_evt.as_ref().map(|e| e.try_clone()).transpose() {
2082                         Ok(clear_evt) => {
2083                             pm.lock().gpe_evt(*gpe, clear_evt);
2084                             VmResponse::Ok
2085                         }
2086                         Err(err) => {
2087                             error!("Error cloning clear_evt: {:?}", err);
2088                             VmResponse::Err(SysError::new(EIO))
2089                         }
2090                     }
2091                 } else {
2092                     error!("{:#?} not supported", *self);
2093                     VmResponse::Err(SysError::new(ENOTSUP))
2094                 }
2095             }
2096             VmRequest::PciPme(requester_id) => {
2097                 if let Some(pm) = pm.as_ref() {
2098                     pm.lock().pme_evt(*requester_id);
2099                     VmResponse::Ok
2100                 } else {
2101                     error!("{:#?} not supported", *self);
2102                     VmResponse::Err(SysError::new(ENOTSUP))
2103                 }
2104             }
2105             VmRequest::MakeRT => {
2106                 kick_vcpus(VcpuControl::MakeRT);
2107                 VmResponse::Ok
2108             }
2109             #[cfg(feature = "balloon")]
2110             VmRequest::BalloonCommand(_) => unreachable!("Should be handled with BalloonTube"),
2111             VmRequest::DiskCommand {
2112                 disk_index,
2113                 ref command,
2114             } => match &disk_host_tubes.get(*disk_index) {
2115                 Some(tube) => handle_disk_command(command, tube),
2116                 None => VmResponse::Err(SysError::new(ENODEV)),
2117             },
2118             #[cfg(feature = "gpu")]
2119             VmRequest::GpuCommand(ref cmd) => match gpu_control_tube {
2120                 Some(gpu_control) => {
2121                     let res = gpu_control.send(cmd);
2122                     if let Err(e) = res {
2123                         error!("fail to send command to gpu control socket: {}", e);
2124                         return VmResponse::Err(SysError::new(EIO));
2125                     }
2126                     match gpu_control.recv() {
2127                         Ok(response) => VmResponse::GpuResponse(response),
2128                         Err(e) => {
2129                             error!("fail to recv command from gpu control socket: {}", e);
2130                             VmResponse::Err(SysError::new(EIO))
2131                         }
2132                     }
2133                 }
2134                 None => {
2135                     error!("gpu control is not enabled in crosvm");
2136                     VmResponse::Err(SysError::new(EIO))
2137                 }
2138             },
2139             VmRequest::UsbCommand(ref cmd) => {
2140                 let usb_control_tube = match usb_control_tube {
2141                     Some(t) => t,
2142                     None => {
2143                         error!("attempted to execute USB request without control tube");
2144                         return VmResponse::Err(SysError::new(ENODEV));
2145                     }
2146                 };
2147                 let res = usb_control_tube.send(cmd);
2148                 if let Err(e) = res {
2149                     error!("fail to send command to usb control socket: {}", e);
2150                     return VmResponse::Err(SysError::new(EIO));
2151                 }
2152                 match usb_control_tube.recv() {
2153                     Ok(response) => VmResponse::UsbResponse(response),
2154                     Err(e) => {
2155                         error!("fail to recv command from usb control socket: {}", e);
2156                         VmResponse::Err(SysError::new(EIO))
2157                     }
2158                 }
2159             }
2160             VmRequest::BatCommand(type_, ref cmd) => {
2161                 match bat_control {
2162                     Some(battery) => {
2163                         if battery.type_ != *type_ {
2164                             error!("ignored battery command due to battery type: expected {:?}, got {:?}", battery.type_, type_);
2165                             return VmResponse::Err(SysError::new(EINVAL));
2166                         }
2167 
2168                         let res = battery.control_tube.send(cmd);
2169                         if let Err(e) = res {
2170                             error!("fail to send command to bat control socket: {}", e);
2171                             return VmResponse::Err(SysError::new(EIO));
2172                         }
2173 
2174                         match battery.control_tube.recv() {
2175                             Ok(response) => VmResponse::BatResponse(response),
2176                             Err(e) => {
2177                                 error!("fail to recv command from bat control socket: {}", e);
2178                                 VmResponse::Err(SysError::new(EIO))
2179                             }
2180                         }
2181                     }
2182                     None => VmResponse::BatResponse(BatControlResult::NoBatDevice),
2183                 }
2184             }
2185             #[cfg(feature = "audio")]
2186             VmRequest::SndCommand(ref cmd) => match cmd {
2187                 SndControlCommand::MuteAll(muted) => {
2188                     for tube in snd_host_tubes {
2189                         let res = tube.send(&SndControlCommand::MuteAll(*muted));
2190                         if let Err(e) = res {
2191                             error!("fail to send command to snd control socket: {}", e);
2192                             return VmResponse::Err(SysError::new(EIO));
2193                         }
2194 
2195                         match tube.recv() {
2196                             Ok(VmResponse::Ok) => {
2197                                 debug!("device is successfully muted");
2198                             }
2199                             Ok(resp) => {
2200                                 error!("mute failed: {}", resp);
2201                                 return VmResponse::ErrString("fail to mute the device".to_owned());
2202                             }
2203                             Err(e) => return VmResponse::Err(SysError::new(EIO)),
2204                         }
2205                     }
2206                     VmResponse::Ok
2207                 }
2208             },
2209             VmRequest::HotPlugVfioCommand { device: _, add: _ } => VmResponse::Ok,
2210             #[cfg(feature = "pci-hotplug")]
2211             VmRequest::HotPlugNetCommand(ref _net_cmd) => {
2212                 VmResponse::ErrString("hot plug not supported".to_owned())
2213             }
2214             VmRequest::Snapshot(SnapshotCommand::Take {
2215                 ref snapshot_path,
2216                 compress_memory,
2217                 encrypt,
2218             }) => {
2219                 info!("Starting crosvm snapshot");
2220                 match do_snapshot(
2221                     snapshot_path.to_path_buf(),
2222                     kick_vcpus,
2223                     irq_handler_control,
2224                     device_control_tube,
2225                     vcpu_size,
2226                     snapshot_irqchip,
2227                     *compress_memory,
2228                     *encrypt,
2229                     suspended_pvclock_state,
2230                     vm,
2231                 ) {
2232                     Ok(()) => {
2233                         info!("Finished crosvm snapshot successfully");
2234                         VmResponse::Ok
2235                     }
2236                     Err(e) => {
2237                         error!("failed to handle snapshot: {:?}", e);
2238                         VmResponse::Err(SysError::new(EIO))
2239                     }
2240                 }
2241             }
2242             VmRequest::RegisterListener {
2243                 socket_addr: _,
2244                 event: _,
2245             } => VmResponse::Ok,
2246             VmRequest::UnregisterListener {
2247                 socket_addr: _,
2248                 event: _,
2249             } => VmResponse::Ok,
2250             VmRequest::Unregister { socket_addr: _ } => VmResponse::Ok,
2251             VmRequest::VcpuPidTid => unreachable!(),
2252             VmRequest::Throttle(_, _) => unreachable!(),
2253             VmRequest::GetVmDescriptor => {
2254                 let vm_fd = match vm.try_clone_descriptor() {
2255                     Ok(vm_fd) => vm_fd,
2256                     Err(e) => {
2257                         error!("failed to get vm_fd: {:?}", e);
2258                         return VmResponse::Err(e);
2259                     }
2260                 };
2261                 VmResponse::VmDescriptor {
2262                     hypervisor: vm.hypervisor_kind(),
2263                     vm_fd,
2264                 }
2265             }
2266         }
2267     }
2268 }
2269 
2270 /// Snapshot the VM to file at `snapshot_path`
do_snapshot( snapshot_path: PathBuf, kick_vcpus: impl Fn(VcpuControl), irq_handler_control: &Tube, device_control_tube: &Tube, vcpu_size: usize, snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>, compress_memory: bool, encrypt: bool, suspended_pvclock_state: &mut Option<hypervisor::ClockState>, vm: &impl Vm, ) -> anyhow::Result<()>2271 fn do_snapshot(
2272     snapshot_path: PathBuf,
2273     kick_vcpus: impl Fn(VcpuControl),
2274     irq_handler_control: &Tube,
2275     device_control_tube: &Tube,
2276     vcpu_size: usize,
2277     snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
2278     compress_memory: bool,
2279     encrypt: bool,
2280     suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2281     vm: &impl Vm,
2282 ) -> anyhow::Result<()> {
2283     let snapshot_start = Instant::now();
2284 
2285     let _vcpu_guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size)?;
2286     let _device_guard = DeviceSleepGuard::new(device_control_tube)?;
2287 
2288     // We want to flush all pending IRQs to the LAPICs. There are two cases:
2289     //
2290     // MSIs: these are directly delivered to the LAPIC. We must verify the handler
2291     // thread cycles once to deliver these interrupts.
2292     //
2293     // Legacy interrupts: in the case of a split IRQ chip, these interrupts may
2294     // flow through the userspace IOAPIC. If the hypervisor does not support
2295     // irqfds (e.g. WHPX), a single iteration will only flush the IRQ to the
2296     // IOAPIC. The underlying MSI will be asserted at this point, but if the
2297     // IRQ handler doesn't run another iteration, it won't be delivered to the
2298     // LAPIC. This is why we cycle the handler thread twice (doing so ensures we
2299     // process the underlying MSI).
2300     //
2301     // We can handle both of these cases by iterating until there are no tokens
2302     // serviced on the requested iteration. Note that in the legacy case, this
2303     // ensures at least two iterations.
2304     //
2305     // Note: within CrosVM, *all* interrupts are eventually converted into the
2306     // same mechanicism that MSIs use. This is why we say "underlying" MSI for
2307     // a legacy IRQ.
2308     let mut flush_attempts = 0;
2309     loop {
2310         irq_handler_control
2311             .send(&IrqHandlerRequest::WakeAndNotifyIteration)
2312             .context("failed to send flush command to IRQ handler thread")?;
2313         let resp = irq_handler_control
2314             .recv()
2315             .context("failed to recv flush response from IRQ handler thread")?;
2316         match resp {
2317             IrqHandlerResponse::HandlerIterationComplete(tokens_serviced) => {
2318                 if tokens_serviced == 0 {
2319                     break;
2320                 }
2321             }
2322             _ => bail!("received unexpected reply from IRQ handler: {:?}", resp),
2323         }
2324         flush_attempts += 1;
2325         if flush_attempts > EXPECTED_MAX_IRQ_FLUSH_ITERATIONS {
2326             warn!("flushing IRQs for snapshot may be stalled after iteration {}, expected <= {} iterations", flush_attempts, EXPECTED_MAX_IRQ_FLUSH_ITERATIONS);
2327         }
2328     }
2329     info!("flushed IRQs in {} iterations", flush_attempts);
2330 
2331     let snapshot_writer = SnapshotWriter::new(snapshot_path, encrypt)?;
2332 
2333     // Snapshot hypervisor's paravirtualized clock.
2334     snapshot_writer.write_fragment("pvclock", &AnySnapshot::to_any(suspended_pvclock_state)?)?;
2335 
2336     // Snapshot Vcpus
2337     info!("VCPUs snapshotting...");
2338     let (send_chan, recv_chan) = mpsc::channel();
2339     kick_vcpus(VcpuControl::Snapshot(
2340         snapshot_writer.add_namespace("vcpu")?,
2341         send_chan,
2342     ));
2343     // Validate all Vcpus snapshot successfully
2344     for _ in 0..vcpu_size {
2345         recv_chan
2346             .recv()
2347             .context("Failed to recv Vcpu snapshot response")?
2348             .context("Failed to snapshot Vcpu")?;
2349     }
2350     info!("VCPUs snapshotted.");
2351 
2352     // Snapshot irqchip
2353     info!("Snapshotting irqchip...");
2354     let irqchip_snap = snapshot_irqchip()?;
2355     snapshot_writer
2356         .write_fragment("irqchip", &irqchip_snap)
2357         .context("Failed to write irqchip state")?;
2358     info!("Snapshotted irqchip.");
2359 
2360     // Snapshot devices
2361     info!("Devices snapshotting...");
2362     device_control_tube
2363         .send(&DeviceControlCommand::SnapshotDevices {
2364             snapshot_writer,
2365             compress_memory,
2366         })
2367         .context("send command to devices control socket")?;
2368     let resp: VmResponse = device_control_tube
2369         .recv()
2370         .context("receive from devices control socket")?;
2371     if !matches!(resp, VmResponse::Ok) {
2372         bail!("unexpected SnapshotDevices response: {resp}");
2373     }
2374     info!("Devices snapshotted.");
2375 
2376     let snap_duration_ms = snapshot_start.elapsed().as_millis();
2377     info!(
2378         "snapshot: completed snapshot in {}ms; VM mem size: {}MB",
2379         snap_duration_ms,
2380         vm.get_memory().memory_size() / 1024 / 1024,
2381     );
2382     metrics::log_metric_with_details(
2383         metrics::MetricEventType::SnapshotSaveOverallLatency,
2384         snap_duration_ms as i64,
2385         &metrics_events::RecordDetails {},
2386     );
2387     Ok(())
2388 }
2389 
2390 /// Restore the VM to the snapshot at `restore_path`.
2391 ///
2392 /// Same as `VmRequest::execute` with a `VmRequest::Restore`. Exposed as a separate function
2393 /// because not all the `VmRequest::execute` arguments are available in the "cold restore" flow.
do_restore( restore_path: &Path, kick_vcpus: impl Fn(VcpuControl), kick_vcpu: impl Fn(VcpuControl, usize), irq_handler_control: &Tube, device_control_tube: &Tube, vcpu_size: usize, mut restore_irqchip: impl FnMut(AnySnapshot) -> anyhow::Result<()>, require_encrypted: bool, suspended_pvclock_state: &mut Option<hypervisor::ClockState>, vm: &impl Vm, ) -> anyhow::Result<()>2394 pub fn do_restore(
2395     restore_path: &Path,
2396     kick_vcpus: impl Fn(VcpuControl),
2397     kick_vcpu: impl Fn(VcpuControl, usize),
2398     irq_handler_control: &Tube,
2399     device_control_tube: &Tube,
2400     vcpu_size: usize,
2401     mut restore_irqchip: impl FnMut(AnySnapshot) -> anyhow::Result<()>,
2402     require_encrypted: bool,
2403     suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2404     vm: &impl Vm,
2405 ) -> anyhow::Result<()> {
2406     let restore_start = Instant::now();
2407     let _guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size);
2408     let _devices_guard = DeviceSleepGuard::new(device_control_tube)?;
2409 
2410     let snapshot_reader = SnapshotReader::new(restore_path, require_encrypted)?;
2411 
2412     // Restore hypervisor's paravirtualized clock.
2413     *suspended_pvclock_state = snapshot_reader.read_fragment("pvclock")?;
2414 
2415     // Restore IrqChip
2416     let irq_snapshot: AnySnapshot = snapshot_reader.read_fragment("irqchip")?;
2417     restore_irqchip(irq_snapshot)?;
2418 
2419     // Restore Vcpu(s)
2420     let vcpu_snapshot_reader = snapshot_reader.namespace("vcpu")?;
2421     let vcpu_snapshot_count = vcpu_snapshot_reader.list_fragments()?.len();
2422     if vcpu_snapshot_count != vcpu_size {
2423         bail!(
2424             "bad cpu count in snapshot: expected={} got={}",
2425             vcpu_size,
2426             vcpu_snapshot_count,
2427         );
2428     }
2429     #[cfg(target_arch = "x86_64")]
2430     let host_tsc_reference_moment = {
2431         // SAFETY: rdtsc takes no arguments.
2432         unsafe { _rdtsc() }
2433     };
2434     let (send_chan, recv_chan) = mpsc::channel();
2435     for vcpu_id in 0..vcpu_size {
2436         kick_vcpu(
2437             VcpuControl::Restore(VcpuRestoreRequest {
2438                 result_sender: send_chan.clone(),
2439                 snapshot_reader: vcpu_snapshot_reader.clone(),
2440                 #[cfg(target_arch = "x86_64")]
2441                 host_tsc_reference_moment,
2442             }),
2443             vcpu_id,
2444         );
2445     }
2446     for _ in 0..vcpu_size {
2447         recv_chan
2448             .recv()
2449             .context("Failed to recv restore response")?
2450             .context("Failed to restore vcpu")?;
2451     }
2452 
2453     // Restore devices
2454     device_control_tube
2455         .send(&DeviceControlCommand::RestoreDevices { snapshot_reader })
2456         .context("send command to devices control socket")?;
2457     let resp: VmResponse = device_control_tube
2458         .recv()
2459         .context("receive from devices control socket")?;
2460     if !matches!(resp, VmResponse::Ok) {
2461         bail!("unexpected RestoreDevices response: {resp}");
2462     }
2463 
2464     irq_handler_control
2465         .send(&IrqHandlerRequest::RefreshIrqEventTokens)
2466         .context("failed to send refresh irq event token command to IRQ handler thread")?;
2467     let resp: IrqHandlerResponse = irq_handler_control
2468         .recv()
2469         .context("failed to recv refresh response from IRQ handler thread")?;
2470     if !matches!(resp, IrqHandlerResponse::IrqEventTokenRefreshComplete) {
2471         bail!(
2472             "received unexpected reply from IRQ handler thread: {:?}",
2473             resp
2474         );
2475     }
2476 
2477     let restore_duration_ms = restore_start.elapsed().as_millis();
2478     info!(
2479         "snapshot: completed restore in {}ms; mem size: {}",
2480         restore_duration_ms,
2481         vm.get_memory().memory_size(),
2482     );
2483     metrics::log_metric_with_details(
2484         metrics::MetricEventType::SnapshotRestoreOverallLatency,
2485         restore_duration_ms as i64,
2486         &metrics_events::RecordDetails {},
2487     );
2488     Ok(())
2489 }
2490 
2491 pub type HypervisorKind = hypervisor::HypervisorKind;
2492 
2493 /// Indication of success or failure of a `VmRequest`.
2494 ///
2495 /// Success is usually indicated `VmResponse::Ok` unless there is data associated with the response.
2496 #[derive(Serialize, Deserialize, Debug)]
2497 #[must_use]
2498 pub enum VmResponse {
2499     /// Indicates the request was executed successfully.
2500     Ok,
2501     /// Indicates the request encountered some error during execution.
2502     Err(SysError),
2503     /// Indicates the request encountered some error during execution.
2504     ErrString(String),
2505     /// The memory was registered into guest address space in memory slot number `slot`.
2506     RegisterMemory { slot: u32 },
2507     /// Results of balloon control commands.
2508     #[cfg(feature = "balloon")]
2509     BalloonStats {
2510         stats: balloon_control::BalloonStats,
2511         balloon_actual: u64,
2512     },
2513     /// Results of balloon WS-R command
2514     #[cfg(feature = "balloon")]
2515     BalloonWS {
2516         ws: balloon_control::BalloonWS,
2517         balloon_actual: u64,
2518     },
2519     /// Results of PCI hot plug
2520     #[cfg(feature = "pci-hotplug")]
2521     PciHotPlugResponse { bus: u8 },
2522     /// Results of usb control commands.
2523     UsbResponse(UsbControlResult),
2524     #[cfg(feature = "gpu")]
2525     /// Results of gpu control commands.
2526     GpuResponse(GpuControlResult),
2527     /// Results of battery control commands.
2528     BatResponse(BatControlResult),
2529     /// Results of swap status command.
2530     SwapStatus(SwapStatus),
2531     /// Gets the state of Devices (sleep/wake)
2532     DevicesState(DevicesState),
2533     /// Map of the Vcpu PID/TIDs
2534     VcpuPidTidResponse {
2535         pid_tid_map: BTreeMap<usize, (u32, u32)>,
2536     },
2537     VmDescriptor {
2538         hypervisor: HypervisorKind,
2539         vm_fd: SafeDescriptor,
2540     },
2541 }
2542 
2543 impl Display for VmResponse {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result2544     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2545         use self::VmResponse::*;
2546 
2547         match self {
2548             Ok => write!(f, "ok"),
2549             Err(e) => write!(f, "error: {}", e),
2550             ErrString(e) => write!(f, "error: {}", e),
2551             RegisterMemory { slot } => write!(f, "memory registered in slot {}", slot),
2552             #[cfg(feature = "balloon")]
2553             VmResponse::BalloonStats {
2554                 stats,
2555                 balloon_actual,
2556             } => {
2557                 write!(
2558                     f,
2559                     "stats: {}\nballoon_actual: {}",
2560                     serde_json::to_string_pretty(&stats)
2561                         .unwrap_or_else(|_| "invalid_response".to_string()),
2562                     balloon_actual
2563                 )
2564             }
2565             #[cfg(feature = "balloon")]
2566             VmResponse::BalloonWS { ws, balloon_actual } => {
2567                 write!(
2568                     f,
2569                     "ws: {}, balloon_actual: {}",
2570                     serde_json::to_string_pretty(&ws)
2571                         .unwrap_or_else(|_| "invalid_response".to_string()),
2572                     balloon_actual,
2573                 )
2574             }
2575             UsbResponse(result) => write!(f, "usb control request get result {:?}", result),
2576             #[cfg(feature = "pci-hotplug")]
2577             PciHotPlugResponse { bus } => write!(f, "pci hotplug bus {:?}", bus),
2578             #[cfg(feature = "gpu")]
2579             GpuResponse(result) => write!(f, "gpu control request result {:?}", result),
2580             BatResponse(result) => write!(f, "{}", result),
2581             SwapStatus(status) => {
2582                 write!(
2583                     f,
2584                     "{}",
2585                     serde_json::to_string(&status)
2586                         .unwrap_or_else(|_| "invalid_response".to_string()),
2587                 )
2588             }
2589             DevicesState(status) => write!(f, "devices status: {:?}", status),
2590             VcpuPidTidResponse { pid_tid_map } => write!(f, "vcpu pid tid map: {:?}", pid_tid_map),
2591             VmDescriptor { hypervisor, vm_fd } => {
2592                 write!(f, "hypervisor: {:?}, vm_fd: {:?}", hypervisor, vm_fd)
2593             }
2594         }
2595     }
2596 }
2597 
2598 /// Enum that allows remote control of a wait context (used between the Windows GpuDisplay & the
2599 /// GPU worker).
2600 #[derive(Serialize, Deserialize)]
2601 pub enum ModifyWaitContext {
2602     Add(#[serde(with = "with_as_descriptor")] Descriptor),
2603 }
2604 
2605 #[sorted]
2606 #[derive(Error, Debug)]
2607 pub enum VirtioIOMMUVfioError {
2608     #[error("socket failed")]
2609     SocketFailed,
2610     #[error("unexpected response: {0}")]
2611     UnexpectedResponse(VirtioIOMMUResponse),
2612     #[error("unknown command: `{0}`")]
2613     UnknownCommand(String),
2614     #[error("{0}")]
2615     VfioControl(VirtioIOMMUVfioResult),
2616 }
2617 
2618 #[derive(Serialize, Deserialize, Debug)]
2619 pub enum VirtioIOMMUVfioCommand {
2620     // Add the vfio device attached to virtio-iommu.
2621     VfioDeviceAdd {
2622         endpoint_addr: u32,
2623         wrapper_id: u32,
2624         #[serde(with = "with_as_descriptor")]
2625         container: File,
2626     },
2627     // Delete the vfio device attached to virtio-iommu.
2628     VfioDeviceDel {
2629         endpoint_addr: u32,
2630     },
2631     // Map a dma-buf into vfio iommu table
2632     VfioDmabufMap {
2633         region_id: VmMemoryRegionId,
2634         gpa: u64,
2635         size: u64,
2636         dma_buf: SafeDescriptor,
2637     },
2638     // Unmap a dma-buf from vfio iommu table
2639     VfioDmabufUnmap(VmMemoryRegionId),
2640 }
2641 
2642 #[derive(Serialize, Deserialize, Debug)]
2643 pub enum VirtioIOMMUVfioResult {
2644     Ok,
2645     NotInPCIRanges,
2646     NoAvailableContainer,
2647     NoSuchDevice,
2648     NoSuchMappedDmabuf,
2649     InvalidParam,
2650 }
2651 
2652 impl Display for VirtioIOMMUVfioResult {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result2653     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2654         use self::VirtioIOMMUVfioResult::*;
2655 
2656         match self {
2657             Ok => write!(f, "successfully"),
2658             NotInPCIRanges => write!(f, "not in the pci ranges of virtio-iommu"),
2659             NoAvailableContainer => write!(f, "no available vfio container"),
2660             NoSuchDevice => write!(f, "no such a vfio device"),
2661             NoSuchMappedDmabuf => write!(f, "no such a mapped dmabuf"),
2662             InvalidParam => write!(f, "invalid parameters"),
2663         }
2664     }
2665 }
2666 
2667 /// A request to the virtio-iommu process to perform some operations.
2668 ///
2669 /// Unless otherwise noted, each request should expect a `VirtioIOMMUResponse::Ok` to be received on
2670 /// success.
2671 #[derive(Serialize, Deserialize, Debug)]
2672 pub enum VirtioIOMMURequest {
2673     /// Command for vfio related operations.
2674     VfioCommand(VirtioIOMMUVfioCommand),
2675 }
2676 
2677 /// Indication of success or failure of a `VirtioIOMMURequest`.
2678 ///
2679 /// Success is usually indicated `VirtioIOMMUResponse::Ok` unless there is data associated with the
2680 /// response.
2681 #[derive(Serialize, Deserialize, Debug)]
2682 pub enum VirtioIOMMUResponse {
2683     /// Indicates the request was executed successfully.
2684     Ok,
2685     /// Indicates the request encountered some error during execution.
2686     Err(SysError),
2687     /// Results for Vfio commands.
2688     VfioResponse(VirtioIOMMUVfioResult),
2689 }
2690 
2691 impl Display for VirtioIOMMUResponse {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result2692     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2693         use self::VirtioIOMMUResponse::*;
2694         match self {
2695             Ok => write!(f, "ok"),
2696             Err(e) => write!(f, "error: {}", e),
2697             VfioResponse(result) => write!(
2698                 f,
2699                 "The vfio-related virtio-iommu request got result: {:?}",
2700                 result
2701             ),
2702         }
2703     }
2704 }
2705 
2706 /// Send VirtioIOMMURequest without waiting for the response
virtio_iommu_request_async( iommu_control_tube: &Tube, req: &VirtioIOMMURequest, ) -> VirtioIOMMUResponse2707 pub fn virtio_iommu_request_async(
2708     iommu_control_tube: &Tube,
2709     req: &VirtioIOMMURequest,
2710 ) -> VirtioIOMMUResponse {
2711     match iommu_control_tube.send(&req) {
2712         Ok(_) => VirtioIOMMUResponse::Ok,
2713         Err(e) => {
2714             error!("virtio-iommu socket send failed: {:?}", e);
2715             VirtioIOMMUResponse::Err(SysError::last())
2716         }
2717     }
2718 }
2719 
2720 pub type VirtioIOMMURequestResult = std::result::Result<VirtioIOMMUResponse, ()>;
2721 
2722 /// Send VirtioIOMMURequest and wait to get the response
virtio_iommu_request( iommu_control_tube: &Tube, req: &VirtioIOMMURequest, ) -> VirtioIOMMURequestResult2723 pub fn virtio_iommu_request(
2724     iommu_control_tube: &Tube,
2725     req: &VirtioIOMMURequest,
2726 ) -> VirtioIOMMURequestResult {
2727     let response = match virtio_iommu_request_async(iommu_control_tube, req) {
2728         VirtioIOMMUResponse::Ok => match iommu_control_tube.recv() {
2729             Ok(response) => response,
2730             Err(e) => {
2731                 error!("virtio-iommu socket recv failed: {:?}", e);
2732                 VirtioIOMMUResponse::Err(SysError::last())
2733             }
2734         },
2735         resp => resp,
2736     };
2737     Ok(response)
2738 }
2739