• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::BTreeMap;
6 use std::fs::File;
7 use std::io;
8 use std::mem::size_of;
9 use std::time::Duration;
10 
11 use anyhow::anyhow;
12 use anyhow::Context;
13 use base::error;
14 use base::AsRawDescriptor;
15 use base::Error as SysError;
16 use base::Event;
17 use base::RawDescriptor;
18 use base::Result as SysResult;
19 use base::Timer;
20 use base::Tube;
21 use base::TubeError;
22 use base::WorkerThread;
23 use cros_async::select2;
24 use cros_async::select3;
25 use cros_async::AsyncError;
26 use cros_async::EventAsync;
27 use cros_async::Executor;
28 use cros_async::TimerAsync;
29 use data_model::Le32;
30 use data_model::Le64;
31 use futures::pin_mut;
32 use remain::sorted;
33 use snapshot::AnySnapshot;
34 use thiserror::Error;
35 use vm_control::MemSlot;
36 use vm_control::VmMemoryMappingRequest;
37 use vm_control::VmMemoryMappingResponse;
38 use vm_memory::GuestAddress;
39 use vm_memory::GuestMemory;
40 use zerocopy::FromBytes;
41 use zerocopy::Immutable;
42 use zerocopy::IntoBytes;
43 use zerocopy::KnownLayout;
44 
45 use super::async_utils;
46 use super::copy_config;
47 use super::DescriptorChain;
48 use super::DeviceType;
49 use super::Interrupt;
50 use super::Queue;
51 use super::VirtioDevice;
52 
53 const QUEUE_SIZE: u16 = 256;
54 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
55 
56 /* Feature bits */
57 const VIRTIO_PMEM_F_DISCARD: u32 = 63;
58 
59 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
60 const VIRTIO_PMEM_REQ_TYPE_DISCARD: u32 = u32::MAX;
61 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
62 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
63 
64 #[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
65 #[repr(C)]
66 struct virtio_pmem_config {
67     start_address: Le64,
68     size: Le64,
69 }
70 
71 #[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
72 #[repr(C)]
73 struct virtio_pmem_resp {
74     status_code: Le32,
75 }
76 
77 #[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
78 #[repr(C)]
79 struct virtio_pmem_req {
80     type_: Le32,
81 }
82 
83 #[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
84 #[repr(C)]
85 struct virtio_pmem_range_req {
86     type_: Le32,
87     padding_: Le32,
88     start_address: Le64,
89     size: Le64,
90 }
91 
92 #[sorted]
93 #[derive(Error, Debug)]
94 enum Error {
95     /// Failed to get value from pageout timer.
96     #[error("failed to get value from pageout timer: {0}")]
97     PageoutTimer(AsyncError),
98     /// Failed to read from virtqueue.
99     #[error("failed to read from virtqueue: {0}")]
100     ReadQueue(io::Error),
101     /// Failed to receive tube response.
102     #[error("failed to receive tube response: {0}")]
103     ReceiveResponse(TubeError),
104     /// Failed to send tube request.
105     #[error("failed to send tube request: {0}")]
106     SendingRequest(TubeError),
107     /// Failed to write to virtqueue.
108     #[error("failed to write to virtqueue: {0}")]
109     WriteQueue(io::Error),
110 }
111 
112 type Result<T> = ::std::result::Result<T, Error>;
113 
pageout( ex: &Executor, swap_interval: Duration, pmem_device_tube: &Tube, mapping_arena_slot: u32, mapping_size: usize, ) -> Result<()>114 async fn pageout(
115     ex: &Executor,
116     swap_interval: Duration,
117     pmem_device_tube: &Tube,
118     mapping_arena_slot: u32,
119     mapping_size: usize,
120 ) -> Result<()> {
121     let timer = Timer::new().expect("Failed to create a timer");
122     let mut pageout_timer =
123         TimerAsync::new(timer, ex).expect("Failed to create an async pageout timer");
124     pageout_timer
125         .reset_repeating(swap_interval)
126         .expect("Failed to reset pageout timer");
127 
128     loop {
129         pageout_timer.wait().await.map_err(Error::PageoutTimer)?;
130         let request = VmMemoryMappingRequest::MadvisePageout {
131             slot: mapping_arena_slot,
132             offset: 0,
133             size: mapping_size,
134         };
135 
136         pmem_device_tube
137             .send(&request)
138             .map_err(Error::SendingRequest)?;
139         match pmem_device_tube
140             .recv::<VmMemoryMappingResponse>()
141             .map_err(Error::ReceiveResponse)?
142         {
143             VmMemoryMappingResponse::Ok => {}
144             VmMemoryMappingResponse::Err(e) => {
145                 error!("failed to page out the memory mapping: {}", e);
146             }
147         };
148     }
149 }
150 
execute_request( request_type: u32, start_address: u64, size: u64, pmem_device_tube: &Tube, mapping_arena_slot: u32, mapping_size: usize, ) -> u32151 fn execute_request(
152     request_type: u32,
153     start_address: u64,
154     size: u64,
155     pmem_device_tube: &Tube,
156     mapping_arena_slot: u32,
157     mapping_size: usize,
158 ) -> u32 {
159     match request_type {
160         VIRTIO_PMEM_REQ_TYPE_FLUSH => {
161             let request = VmMemoryMappingRequest::MsyncArena {
162                 slot: mapping_arena_slot,
163                 offset: 0, // The pmem backing file is always at offset 0 in the arena.
164                 size: mapping_size,
165             };
166 
167             if let Err(e) = pmem_device_tube.send(&request) {
168                 error!("failed to send request: {}", e);
169                 return VIRTIO_PMEM_RESP_TYPE_EIO;
170             }
171 
172             match pmem_device_tube.recv() {
173                 Ok(response) => match response {
174                     VmMemoryMappingResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
175                     VmMemoryMappingResponse::Err(e) => {
176                         error!("failed flushing disk image: {}", e);
177                         VIRTIO_PMEM_RESP_TYPE_EIO
178                     }
179                 },
180                 Err(e) => {
181                     error!("failed to receive data: {}", e);
182                     VIRTIO_PMEM_RESP_TYPE_EIO
183                 }
184             }
185         }
186 
187         VIRTIO_PMEM_REQ_TYPE_DISCARD => {
188             let request = VmMemoryMappingRequest::MadviseRemove {
189                 slot: mapping_arena_slot,
190                 offset: usize::try_from(start_address).unwrap(),
191                 size: usize::try_from(size).unwrap(),
192             };
193 
194             if let Err(e) = pmem_device_tube.send(&request) {
195                 error!("failed to send request: {}", e);
196                 return VIRTIO_PMEM_RESP_TYPE_EIO;
197             }
198 
199             match pmem_device_tube.recv() {
200                 Ok(response) => match response {
201                     VmMemoryMappingResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
202                     VmMemoryMappingResponse::Err(e) => {
203                         error!("failed to discard memory range: {}", e);
204                         VIRTIO_PMEM_RESP_TYPE_EIO
205                     }
206                 },
207                 Err(e) => {
208                     error!("failed to receive data: {}", e);
209                     VIRTIO_PMEM_RESP_TYPE_EIO
210                 }
211             }
212         }
213 
214         _ => {
215             error!("unknown request type: {}", request_type);
216             VIRTIO_PMEM_RESP_TYPE_EIO
217         }
218     }
219 }
220 
handle_request( avail_desc: &mut DescriptorChain, pmem_device_tube: &Tube, mapping_arena_slot: u32, mapping_size: usize, ) -> Result<usize>221 fn handle_request(
222     avail_desc: &mut DescriptorChain,
223     pmem_device_tube: &Tube,
224     mapping_arena_slot: u32,
225     mapping_size: usize,
226 ) -> Result<usize> {
227     let (request_type, start_address, size) =
228         if avail_desc.reader.available_bytes() == size_of::<virtio_pmem_req>() {
229             let request = avail_desc
230                 .reader
231                 .read_obj::<virtio_pmem_req>()
232                 .map_err(Error::ReadQueue)?;
233             (request.type_.to_native(), 0, 0)
234         } else {
235             let request = avail_desc
236                 .reader
237                 .read_obj::<virtio_pmem_range_req>()
238                 .map_err(Error::ReadQueue)?;
239             (
240                 request.type_.to_native(),
241                 request.start_address.to_native(),
242                 request.size.to_native(),
243             )
244         };
245     let status_code = execute_request(
246         request_type,
247         start_address,
248         size,
249         pmem_device_tube,
250         mapping_arena_slot,
251         mapping_size,
252     );
253 
254     let response = virtio_pmem_resp {
255         status_code: status_code.into(),
256     };
257 
258     avail_desc
259         .writer
260         .write_obj(response)
261         .map_err(Error::WriteQueue)?;
262 
263     Ok(avail_desc.writer.bytes_written())
264 }
265 
handle_queue( queue: &mut Queue, mut queue_event: EventAsync, pmem_device_tube: &Tube, mapping_arena_slot: u32, mapping_size: usize, )266 async fn handle_queue(
267     queue: &mut Queue,
268     mut queue_event: EventAsync,
269     pmem_device_tube: &Tube,
270     mapping_arena_slot: u32,
271     mapping_size: usize,
272 ) {
273     loop {
274         let mut avail_desc = match queue.next_async(&mut queue_event).await {
275             Err(e) => {
276                 error!("Failed to read descriptor {}", e);
277                 return;
278             }
279             Ok(d) => d,
280         };
281 
282         let written = match handle_request(
283             &mut avail_desc,
284             pmem_device_tube,
285             mapping_arena_slot,
286             mapping_size,
287         ) {
288             Ok(n) => n,
289             Err(e) => {
290                 error!("pmem: failed to handle request: {}", e);
291                 0
292             }
293         };
294         queue.add_used(avail_desc, written as u32);
295         queue.trigger_interrupt();
296     }
297 }
298 
run_worker( queue: &mut Queue, pmem_device_tube: &Tube, kill_evt: Event, mapping_arena_slot: u32, mapping_size: usize, swap_interval: Option<Duration>, )299 fn run_worker(
300     queue: &mut Queue,
301     pmem_device_tube: &Tube,
302     kill_evt: Event,
303     mapping_arena_slot: u32,
304     mapping_size: usize,
305     swap_interval: Option<Duration>,
306 ) {
307     let ex = Executor::new().unwrap();
308 
309     let queue_evt = queue
310         .event()
311         .try_clone()
312         .expect("failed to clone queue event");
313     let queue_evt = EventAsync::new(queue_evt, &ex).expect("failed to set up the queue event");
314 
315     // Process requests from the virtio queue.
316     let queue_fut = handle_queue(
317         queue,
318         queue_evt,
319         pmem_device_tube,
320         mapping_arena_slot,
321         mapping_size,
322     );
323     pin_mut!(queue_fut);
324 
325     // Exit if the kill event is triggered.
326     let kill = async_utils::await_and_exit(&ex, kill_evt);
327     pin_mut!(kill);
328 
329     let interval = swap_interval.unwrap_or(Duration::ZERO);
330     if interval.is_zero() {
331         if let Err(e) = ex.run_until(select2(queue_fut, kill)) {
332             error!("error happened in executor: {}", e);
333         }
334     } else {
335         let pageout_fut = pageout(
336             &ex,
337             interval,
338             pmem_device_tube,
339             mapping_arena_slot,
340             mapping_size,
341         );
342         pin_mut!(pageout_fut);
343         if let Err(e) = ex.run_until(select3(queue_fut, kill, pageout_fut)) {
344             error!("error happened in executor: {}", e);
345         }
346     }
347 }
348 
349 /// Specifies how memory slot is initialized.
350 pub enum MemSlotConfig {
351     /// The memory region has already been mapped to the guest.
352     MemSlot {
353         /// index of the guest-mapped memory regions.
354         idx: MemSlot,
355     },
356     /// The memory region that is not initialized yet and whose slot index will be provided via
357     /// `Tube` later. e.g. pmem-ext2 device, where fs construction will be done in the main
358     /// process.
359     LazyInit { tube: Tube },
360 }
361 
362 pub struct Pmem {
363     worker_thread: Option<WorkerThread<(Queue, Tube)>>,
364     features: u64,
365     disk_image: Option<File>,
366     mapping_address: GuestAddress,
367     mem_slot: MemSlotConfig,
368     mapping_size: u64,
369     pmem_device_tube: Option<Tube>,
370     swap_interval: Option<Duration>,
371 }
372 
373 #[derive(serde::Serialize, serde::Deserialize)]
374 struct PmemSnapshot {
375     mapping_address: GuestAddress,
376     mapping_size: u64,
377 }
378 
379 /// Configuration of a virtio-pmem device.
380 pub struct PmemConfig {
381     /// Disk image exposed to the guest.
382     /// If the memory region is not backed by a file, this should be `None`.
383     pub disk_image: Option<File>,
384     /// Guest physical address where the memory will be mapped.
385     pub mapping_address: GuestAddress,
386     pub mem_slot: MemSlotConfig,
387     /// The size of the mapped region.
388     pub mapping_size: u64,
389     /// A communication channel to the main process to send memory requests.
390     pub pmem_device_tube: Tube,
391     /// Interval for periodic swap out of memory mapping
392     pub swap_interval: Option<Duration>,
393     /// Whether the region is writeble or not.
394     pub mapping_writable: bool,
395 }
396 
397 impl Pmem {
new(base_features: u64, cfg: PmemConfig) -> SysResult<Pmem>398     pub fn new(base_features: u64, cfg: PmemConfig) -> SysResult<Pmem> {
399         if cfg.mapping_size > usize::MAX as u64 {
400             return Err(SysError::new(libc::EOVERFLOW));
401         }
402 
403         let mut avail_features = base_features;
404         if cfg.mapping_writable {
405             if let MemSlotConfig::LazyInit { .. } = cfg.mem_slot {
406                 error!("pmem-ext2 must be a read-only device");
407                 return Err(SysError::new(libc::EINVAL));
408             }
409 
410             avail_features |= 1 << VIRTIO_PMEM_F_DISCARD;
411         }
412 
413         Ok(Pmem {
414             worker_thread: None,
415             features: avail_features,
416             disk_image: cfg.disk_image,
417             mapping_address: cfg.mapping_address,
418             mem_slot: cfg.mem_slot,
419             mapping_size: cfg.mapping_size,
420             pmem_device_tube: Some(cfg.pmem_device_tube),
421             swap_interval: cfg.swap_interval,
422         })
423     }
424 }
425 
426 impl VirtioDevice for Pmem {
keep_rds(&self) -> Vec<RawDescriptor>427     fn keep_rds(&self) -> Vec<RawDescriptor> {
428         let mut keep_rds = Vec::new();
429         if let Some(disk_image) = &self.disk_image {
430             keep_rds.push(disk_image.as_raw_descriptor());
431         }
432 
433         if let Some(ref pmem_device_tube) = self.pmem_device_tube {
434             keep_rds.push(pmem_device_tube.as_raw_descriptor());
435         }
436 
437         if let MemSlotConfig::LazyInit { tube } = &self.mem_slot {
438             keep_rds.push(tube.as_raw_descriptor());
439         }
440 
441         keep_rds
442     }
443 
device_type(&self) -> DeviceType444     fn device_type(&self) -> DeviceType {
445         DeviceType::Pmem
446     }
447 
queue_max_sizes(&self) -> &[u16]448     fn queue_max_sizes(&self) -> &[u16] {
449         QUEUE_SIZES
450     }
451 
features(&self) -> u64452     fn features(&self) -> u64 {
453         self.features
454     }
455 
read_config(&self, offset: u64, data: &mut [u8])456     fn read_config(&self, offset: u64, data: &mut [u8]) {
457         let config = virtio_pmem_config {
458             start_address: Le64::from(self.mapping_address.offset()),
459             size: Le64::from(self.mapping_size),
460         };
461         copy_config(data, 0, config.as_bytes(), offset);
462     }
463 
activate( &mut self, _memory: GuestMemory, _interrupt: Interrupt, mut queues: BTreeMap<usize, Queue>, ) -> anyhow::Result<()>464     fn activate(
465         &mut self,
466         _memory: GuestMemory,
467         _interrupt: Interrupt,
468         mut queues: BTreeMap<usize, Queue>,
469     ) -> anyhow::Result<()> {
470         if queues.len() != 1 {
471             return Err(anyhow!("expected 1 queue, got {}", queues.len()));
472         }
473 
474         let mut queue = queues.remove(&0).unwrap();
475 
476         // We checked that this fits in a usize in `Pmem::new`.
477         let mapping_size = self.mapping_size as usize;
478 
479         let pmem_device_tube = self
480             .pmem_device_tube
481             .take()
482             .context("missing pmem device tube")?;
483 
484         let swap_interval = self.swap_interval;
485 
486         let mapping_arena_slot = match &self.mem_slot {
487             MemSlotConfig::MemSlot { idx } => *idx,
488             MemSlotConfig::LazyInit { tube } => tube
489                 .recv::<u32>()
490                 .context("failed to receive memory slot for ext2 pmem device")?,
491         };
492 
493         self.worker_thread = Some(WorkerThread::start("v_pmem", move |kill_event| {
494             run_worker(
495                 &mut queue,
496                 &pmem_device_tube,
497                 kill_event,
498                 mapping_arena_slot,
499                 mapping_size,
500                 swap_interval,
501             );
502             (queue, pmem_device_tube)
503         }));
504 
505         Ok(())
506     }
507 
reset(&mut self) -> anyhow::Result<()>508     fn reset(&mut self) -> anyhow::Result<()> {
509         if let Some(worker_thread) = self.worker_thread.take() {
510             let (_queue, pmem_device_tube) = worker_thread.stop();
511             self.pmem_device_tube = Some(pmem_device_tube);
512         }
513         Ok(())
514     }
515 
virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>>516     fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> {
517         if let Some(worker_thread) = self.worker_thread.take() {
518             let (queue, pmem_device_tube) = worker_thread.stop();
519             self.pmem_device_tube = Some(pmem_device_tube);
520             return Ok(Some(BTreeMap::from([(0, queue)])));
521         }
522         Ok(None)
523     }
524 
virtio_wake( &mut self, queues_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>, ) -> anyhow::Result<()>525     fn virtio_wake(
526         &mut self,
527         queues_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>,
528     ) -> anyhow::Result<()> {
529         if let Some((mem, interrupt, queues)) = queues_state {
530             self.activate(mem, interrupt, queues)?;
531         }
532         Ok(())
533     }
534 
virtio_snapshot(&mut self) -> anyhow::Result<AnySnapshot>535     fn virtio_snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
536         AnySnapshot::to_any(PmemSnapshot {
537             mapping_address: self.mapping_address,
538             mapping_size: self.mapping_size,
539         })
540         .context("failed to serialize pmem snapshot")
541     }
542 
virtio_restore(&mut self, data: AnySnapshot) -> anyhow::Result<()>543     fn virtio_restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
544         let snapshot: PmemSnapshot =
545             AnySnapshot::from_any(data).context("failed to deserialize pmem snapshot")?;
546         anyhow::ensure!(
547             snapshot.mapping_address == self.mapping_address
548                 && snapshot.mapping_size == self.mapping_size,
549             "pmem snapshot doesn't match config: expected {:?}, got {:?}",
550             (self.mapping_address, self.mapping_size),
551             (snapshot.mapping_address, snapshot.mapping_size),
552         );
553         Ok(())
554     }
555 }
556