• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::cell::RefCell;
6 use std::cmp::{max, min};
7 use std::io::{self, Write};
8 use std::mem::size_of;
9 use std::rc::Rc;
10 use std::result;
11 use std::sync::{atomic::AtomicU64, atomic::Ordering, Arc};
12 use std::thread;
13 use std::time::Duration;
14 use std::u32;
15 
16 use futures::pin_mut;
17 use futures::stream::{FuturesUnordered, StreamExt};
18 use remain::sorted;
19 use thiserror::Error as ThisError;
20 
21 use base::Error as SysError;
22 use base::Result as SysResult;
23 use base::{
24     error, info, iov_max, warn, AsRawDescriptor, AsyncTube, Event, RawDescriptor, Timer, Tube,
25     TubeError,
26 };
27 use cros_async::{
28     select5, sync::Mutex as AsyncMutex, AsyncError, EventAsync, Executor, SelectResult, TimerAsync,
29 };
30 use data_model::{DataInit, Le16, Le32, Le64};
31 use disk::{AsyncDisk, ToAsyncDisk};
32 use vm_control::{DiskControlCommand, DiskControlResult};
33 use vm_memory::GuestMemory;
34 
35 use super::{
36     copy_config, DescriptorChain, DescriptorError, Interrupt, Queue, Reader, SignalableInterrupt,
37     VirtioDevice, Writer, TYPE_BLOCK,
38 };
39 
40 const QUEUE_SIZE: u16 = 256;
41 const NUM_QUEUES: u16 = 16;
42 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES as usize];
43 const SECTOR_SHIFT: u8 = 9;
44 const SECTOR_SIZE: u64 = 0x01 << SECTOR_SHIFT;
45 const MAX_DISCARD_SECTORS: u32 = u32::MAX;
46 const MAX_WRITE_ZEROES_SECTORS: u32 = u32::MAX;
47 // Arbitrary limits for number of discard/write zeroes segments.
48 const MAX_DISCARD_SEG: u32 = 32;
49 const MAX_WRITE_ZEROES_SEG: u32 = 32;
50 // Hard-coded to 64 KiB (in 512-byte sectors) for now,
51 // but this should probably be based on cluster size for qcow.
52 const DISCARD_SECTOR_ALIGNMENT: u32 = 128;
53 
54 const ID_LEN: usize = 20;
55 
56 /// Virtio block device identifier.
57 /// This is an ASCII string terminated by a \0, unless all 20 bytes are used,
58 /// in which case the \0 terminator is omitted.
59 pub type BlockId = [u8; ID_LEN];
60 
61 const VIRTIO_BLK_T_IN: u32 = 0;
62 const VIRTIO_BLK_T_OUT: u32 = 1;
63 const VIRTIO_BLK_T_FLUSH: u32 = 4;
64 const VIRTIO_BLK_T_GET_ID: u32 = 8;
65 const VIRTIO_BLK_T_DISCARD: u32 = 11;
66 const VIRTIO_BLK_T_WRITE_ZEROES: u32 = 13;
67 
68 const VIRTIO_BLK_S_OK: u8 = 0;
69 const VIRTIO_BLK_S_IOERR: u8 = 1;
70 const VIRTIO_BLK_S_UNSUPP: u8 = 2;
71 
72 const VIRTIO_BLK_F_SEG_MAX: u32 = 2;
73 const VIRTIO_BLK_F_RO: u32 = 5;
74 const VIRTIO_BLK_F_BLK_SIZE: u32 = 6;
75 const VIRTIO_BLK_F_FLUSH: u32 = 9;
76 const VIRTIO_BLK_F_MQ: u32 = 12;
77 const VIRTIO_BLK_F_DISCARD: u32 = 13;
78 const VIRTIO_BLK_F_WRITE_ZEROES: u32 = 14;
79 
80 #[derive(Copy, Clone, Debug, Default)]
81 #[repr(C)]
82 struct virtio_blk_geometry {
83     cylinders: Le16,
84     heads: u8,
85     sectors: u8,
86 }
87 
88 // Safe because it only has data and has no implicit padding.
89 unsafe impl DataInit for virtio_blk_geometry {}
90 
91 #[derive(Copy, Clone, Debug, Default)]
92 #[repr(C)]
93 struct virtio_blk_topology {
94     physical_block_exp: u8,
95     alignment_offset: u8,
96     min_io_size: Le16,
97     opt_io_size: Le32,
98 }
99 
100 // Safe because it only has data and has no implicit padding.
101 unsafe impl DataInit for virtio_blk_topology {}
102 
103 #[derive(Copy, Clone, Debug, Default)]
104 #[repr(C, packed)]
105 pub(crate) struct virtio_blk_config {
106     capacity: Le64,
107     size_max: Le32,
108     seg_max: Le32,
109     geometry: virtio_blk_geometry,
110     blk_size: Le32,
111     topology: virtio_blk_topology,
112     writeback: u8,
113     unused0: u8,
114     pub num_queues: Le16,
115     max_discard_sectors: Le32,
116     max_discard_seg: Le32,
117     discard_sector_alignment: Le32,
118     max_write_zeroes_sectors: Le32,
119     max_write_zeroes_seg: Le32,
120     write_zeroes_may_unmap: u8,
121     unused1: [u8; 3],
122 }
123 
124 // Safe because it only has data and has no implicit padding.
125 unsafe impl DataInit for virtio_blk_req_header {}
126 
127 #[derive(Copy, Clone, Debug, Default)]
128 #[repr(C)]
129 struct virtio_blk_req_header {
130     req_type: Le32,
131     reserved: Le32,
132     sector: Le64,
133 }
134 
135 // Safe because it only has data and has no implicit padding.
136 unsafe impl DataInit for virtio_blk_config {}
137 
138 #[derive(Copy, Clone, Debug, Default)]
139 #[repr(C)]
140 struct virtio_blk_discard_write_zeroes {
141     sector: Le64,
142     num_sectors: Le32,
143     flags: Le32,
144 }
145 
146 const VIRTIO_BLK_DISCARD_WRITE_ZEROES_FLAG_UNMAP: u32 = 1 << 0;
147 
148 // Safe because it only has data and has no implicit padding.
149 unsafe impl DataInit for virtio_blk_discard_write_zeroes {}
150 
151 #[sorted]
152 #[derive(ThisError, Debug)]
153 enum ExecuteError {
154     #[error("failed to copy ID string: {0}")]
155     CopyId(io::Error),
156     #[error("virtio descriptor error: {0}")]
157     Descriptor(DescriptorError),
158     #[error("failed to perform discard or write zeroes; sector={sector} num_sectors={num_sectors} flags={flags}; {ioerr:?}")]
159     DiscardWriteZeroes {
160         ioerr: Option<disk::Error>,
161         sector: u64,
162         num_sectors: u32,
163         flags: u32,
164     },
165     #[error("failed to flush: {0}")]
166     Flush(disk::Error),
167     #[error("not enough space in descriptor chain to write status")]
168     MissingStatus,
169     #[error("out of range")]
170     OutOfRange,
171     #[error("failed to read message: {0}")]
172     Read(io::Error),
173     #[error("io error reading {length} bytes from sector {sector}: {desc_error}")]
174     ReadIo {
175         length: usize,
176         sector: u64,
177         desc_error: disk::Error,
178     },
179     #[error("read only; request_type={request_type}")]
180     ReadOnly { request_type: u32 },
181     #[error("failed to recieve command message: {0}")]
182     ReceivingCommand(TubeError),
183     #[error("failed to send command response: {0}")]
184     SendingResponse(TubeError),
185     #[error("couldn't reset the timer: {0}")]
186     TimerReset(base::Error),
187     #[error("unsupported ({0})")]
188     Unsupported(u32),
189     #[error("io error writing {length} bytes from sector {sector}: {desc_error}")]
190     WriteIo {
191         length: usize,
192         sector: u64,
193         desc_error: disk::Error,
194     },
195     #[error("failed to write request status: {0}")]
196     WriteStatus(io::Error),
197 }
198 
199 impl ExecuteError {
status(&self) -> u8200     fn status(&self) -> u8 {
201         match self {
202             ExecuteError::CopyId(_) => VIRTIO_BLK_S_IOERR,
203             ExecuteError::Descriptor(_) => VIRTIO_BLK_S_IOERR,
204             ExecuteError::DiscardWriteZeroes { .. } => VIRTIO_BLK_S_IOERR,
205             ExecuteError::Flush(_) => VIRTIO_BLK_S_IOERR,
206             ExecuteError::MissingStatus => VIRTIO_BLK_S_IOERR,
207             ExecuteError::OutOfRange { .. } => VIRTIO_BLK_S_IOERR,
208             ExecuteError::Read(_) => VIRTIO_BLK_S_IOERR,
209             ExecuteError::ReadIo { .. } => VIRTIO_BLK_S_IOERR,
210             ExecuteError::ReadOnly { .. } => VIRTIO_BLK_S_IOERR,
211             ExecuteError::ReceivingCommand(_) => VIRTIO_BLK_S_IOERR,
212             ExecuteError::SendingResponse(_) => VIRTIO_BLK_S_IOERR,
213             ExecuteError::TimerReset(_) => VIRTIO_BLK_S_IOERR,
214             ExecuteError::WriteIo { .. } => VIRTIO_BLK_S_IOERR,
215             ExecuteError::WriteStatus(_) => VIRTIO_BLK_S_IOERR,
216             ExecuteError::Unsupported(_) => VIRTIO_BLK_S_UNSUPP,
217         }
218     }
219 }
220 
221 // Errors that happen in block outside of executing a request.
222 #[derive(ThisError, Debug)]
223 enum OtherError {
224     #[error("couldn't create an async resample event: {0}")]
225     AsyncResampleCreate(AsyncError),
226     #[error("couldn't clone the resample event: {0}")]
227     CloneResampleEvent(base::Error),
228     #[error("couldn't get a value from a timer for flushing: {0}")]
229     FlushTimer(AsyncError),
230     #[error("failed to fsync the disk: {0}")]
231     FsyncDisk(disk::Error),
232     #[error("couldn't read the resample event: {0}")]
233     ReadResampleEvent(AsyncError),
234 }
235 
236 struct DiskState {
237     disk_image: Box<dyn AsyncDisk>,
238     disk_size: Arc<AtomicU64>,
239     read_only: bool,
240     sparse: bool,
241     id: Option<BlockId>,
242 }
243 
process_one_request( avail_desc: DescriptorChain, disk_state: Rc<AsyncMutex<DiskState>>, flush_timer: Rc<RefCell<TimerAsync>>, flush_timer_armed: Rc<RefCell<bool>>, mem: &GuestMemory, ) -> result::Result<usize, ExecuteError>244 async fn process_one_request(
245     avail_desc: DescriptorChain,
246     disk_state: Rc<AsyncMutex<DiskState>>,
247     flush_timer: Rc<RefCell<TimerAsync>>,
248     flush_timer_armed: Rc<RefCell<bool>>,
249     mem: &GuestMemory,
250 ) -> result::Result<usize, ExecuteError> {
251     let mut reader =
252         Reader::new(mem.clone(), avail_desc.clone()).map_err(ExecuteError::Descriptor)?;
253     let mut writer = Writer::new(mem.clone(), avail_desc).map_err(ExecuteError::Descriptor)?;
254 
255     // The last byte of the buffer is virtio_blk_req::status.
256     // Split it into a separate Writer so that status_writer is the final byte and
257     // the original writer is left with just the actual block I/O data.
258     let available_bytes = writer.available_bytes();
259     let status_offset = available_bytes
260         .checked_sub(1)
261         .ok_or(ExecuteError::MissingStatus)?;
262     let mut status_writer = writer.split_at(status_offset);
263 
264     let status = match BlockAsync::execute_request(
265         &mut reader,
266         &mut writer,
267         disk_state,
268         flush_timer,
269         flush_timer_armed,
270     )
271     .await
272     {
273         Ok(()) => VIRTIO_BLK_S_OK,
274         Err(e) => {
275             error!("failed executing disk request: {}", e);
276             e.status()
277         }
278     };
279 
280     status_writer
281         .write_all(&[status])
282         .map_err(ExecuteError::WriteStatus)?;
283     Ok(available_bytes)
284 }
285 
process_one_request_task( queue: Rc<RefCell<Queue>>, avail_desc: DescriptorChain, disk_state: Rc<AsyncMutex<DiskState>>, mem: GuestMemory, interrupt: Rc<RefCell<Interrupt>>, flush_timer: Rc<RefCell<TimerAsync>>, flush_timer_armed: Rc<RefCell<bool>>, )286 async fn process_one_request_task(
287     queue: Rc<RefCell<Queue>>,
288     avail_desc: DescriptorChain,
289     disk_state: Rc<AsyncMutex<DiskState>>,
290     mem: GuestMemory,
291     interrupt: Rc<RefCell<Interrupt>>,
292     flush_timer: Rc<RefCell<TimerAsync>>,
293     flush_timer_armed: Rc<RefCell<bool>>,
294 ) {
295     let descriptor_index = avail_desc.index;
296     let len =
297         match process_one_request(avail_desc, disk_state, flush_timer, flush_timer_armed, &mem)
298             .await
299         {
300             Ok(len) => len,
301             Err(e) => {
302                 error!("block: failed to handle request: {}", e);
303                 0
304             }
305         };
306 
307     let mut queue = queue.borrow_mut();
308     queue.add_used(&mem, descriptor_index, len as u32);
309     queue.trigger_interrupt(&mem, &*interrupt.borrow());
310     queue.update_int_required(&mem);
311 }
312 
313 // There is one async task running `handle_queue` per virtio queue in use.
314 // Receives messages from the guest and queues a task to complete the operations with the async
315 // executor.
handle_queue( ex: &Executor, mem: &GuestMemory, disk_state: Rc<AsyncMutex<DiskState>>, queue: Rc<RefCell<Queue>>, evt: EventAsync, interrupt: Rc<RefCell<Interrupt>>, flush_timer: Rc<RefCell<TimerAsync>>, flush_timer_armed: Rc<RefCell<bool>>, )316 async fn handle_queue(
317     ex: &Executor,
318     mem: &GuestMemory,
319     disk_state: Rc<AsyncMutex<DiskState>>,
320     queue: Rc<RefCell<Queue>>,
321     evt: EventAsync,
322     interrupt: Rc<RefCell<Interrupt>>,
323     flush_timer: Rc<RefCell<TimerAsync>>,
324     flush_timer_armed: Rc<RefCell<bool>>,
325 ) {
326     loop {
327         if let Err(e) = evt.next_val().await {
328             error!("Failed to read the next queue event: {}", e);
329             continue;
330         }
331         while let Some(descriptor_chain) = queue.borrow_mut().pop(&mem) {
332             ex.spawn_local(process_one_request_task(
333                 Rc::clone(&queue),
334                 descriptor_chain,
335                 Rc::clone(&disk_state),
336                 mem.clone(),
337                 Rc::clone(&interrupt),
338                 Rc::clone(&flush_timer),
339                 Rc::clone(&flush_timer_armed),
340             ))
341             .detach();
342         }
343     }
344 }
345 
handle_irq_resample( ex: &Executor, interrupt: Rc<RefCell<Interrupt>>, ) -> result::Result<(), OtherError>346 async fn handle_irq_resample(
347     ex: &Executor,
348     interrupt: Rc<RefCell<Interrupt>>,
349 ) -> result::Result<(), OtherError> {
350     let resample_evt = if let Some(resample_evt) = interrupt.borrow().get_resample_evt() {
351         let resample_evt = resample_evt
352             .try_clone()
353             .map_err(OtherError::CloneResampleEvent)?;
354         let resample_evt =
355             EventAsync::new(resample_evt.0, ex).map_err(OtherError::AsyncResampleCreate)?;
356         Some(resample_evt)
357     } else {
358         None
359     };
360     if let Some(resample_evt) = resample_evt {
361         loop {
362             let _ = resample_evt
363                 .next_val()
364                 .await
365                 .map_err(OtherError::ReadResampleEvent)?;
366             interrupt.borrow().do_interrupt_resample();
367         }
368     } else {
369         // no resample event, park the future.
370         let () = futures::future::pending().await;
371         Ok(())
372     }
373 }
374 
wait_kill(kill_evt: EventAsync)375 async fn wait_kill(kill_evt: EventAsync) {
376     // Once this event is readable, exit. Exiting this future will cause the main loop to
377     // break and the device process to exit.
378     let _ = kill_evt.next_val().await;
379 }
380 
handle_command_tube( command_tube: &Option<AsyncTube>, interrupt: Rc<RefCell<Interrupt>>, disk_state: Rc<AsyncMutex<DiskState>>, ) -> Result<(), ExecuteError>381 async fn handle_command_tube(
382     command_tube: &Option<AsyncTube>,
383     interrupt: Rc<RefCell<Interrupt>>,
384     disk_state: Rc<AsyncMutex<DiskState>>,
385 ) -> Result<(), ExecuteError> {
386     let command_tube = match command_tube {
387         Some(c) => c,
388         None => {
389             let () = futures::future::pending().await;
390             return Ok(());
391         }
392     };
393     loop {
394         match command_tube.next().await {
395             Ok(command) => {
396                 let resp = match command {
397                     DiskControlCommand::Resize { new_size } => {
398                         resize(Rc::clone(&disk_state), new_size).await
399                     }
400                 };
401 
402                 command_tube
403                     .send(&resp)
404                     .map_err(ExecuteError::SendingResponse)?;
405                 if let DiskControlResult::Ok = resp {
406                     interrupt.borrow_mut().signal_config_changed();
407                 }
408             }
409             Err(e) => return Err(ExecuteError::ReceivingCommand(e)),
410         }
411     }
412 }
413 
resize(disk_state: Rc<AsyncMutex<DiskState>>, new_size: u64) -> DiskControlResult414 async fn resize(disk_state: Rc<AsyncMutex<DiskState>>, new_size: u64) -> DiskControlResult {
415     // Acquire exclusive, mutable access to the state so the virtqueue task won't be able to read
416     // the state while resizing.
417     let mut disk_state = disk_state.lock().await;
418 
419     if disk_state.read_only {
420         error!("Attempted to resize read-only block device");
421         return DiskControlResult::Err(SysError::new(libc::EROFS));
422     }
423 
424     info!("Resizing block device to {} bytes", new_size);
425 
426     if let Err(e) = disk_state.disk_image.set_len(new_size) {
427         error!("Resizing disk failed! {}", e);
428         return DiskControlResult::Err(SysError::new(libc::EIO));
429     }
430 
431     // Allocate new space if the disk image is not sparse.
432     if let Err(e) = disk_state.disk_image.allocate(0, new_size) {
433         error!("Allocating disk space after resize failed! {}", e);
434         return DiskControlResult::Err(SysError::new(libc::EIO));
435     }
436 
437     disk_state.sparse = false;
438 
439     if let Ok(new_disk_size) = disk_state.disk_image.get_len() {
440         disk_state.disk_size.store(new_disk_size, Ordering::Release);
441     }
442     DiskControlResult::Ok
443 }
444 
flush_disk( disk_state: Rc<AsyncMutex<DiskState>>, timer: TimerAsync, armed: Rc<RefCell<bool>>, ) -> Result<(), OtherError>445 async fn flush_disk(
446     disk_state: Rc<AsyncMutex<DiskState>>,
447     timer: TimerAsync,
448     armed: Rc<RefCell<bool>>,
449 ) -> Result<(), OtherError> {
450     loop {
451         timer.next_val().await.map_err(OtherError::FlushTimer)?;
452         if !*armed.borrow() {
453             continue;
454         }
455 
456         // Reset armed before calling fsync to guarantee that IO requests that started after we call
457         // fsync will be committed eventually.
458         *armed.borrow_mut() = false;
459 
460         disk_state
461             .read_lock()
462             .await
463             .disk_image
464             .fsync()
465             .await
466             .map_err(OtherError::FsyncDisk)?;
467     }
468 }
469 
470 // The main worker thread. Initialized the asynchronous worker tasks and passes them to the executor
471 // to be processed.
472 //
473 // `disk_state` is wrapped by `AsyncMutex`, which provides both shared and exclusive locks. It's
474 // because the state can be read from the virtqueue task while the control task is processing
475 // a resizing command.
run_worker( ex: Executor, interrupt: Interrupt, queues: Vec<Queue>, mem: GuestMemory, disk_state: &Rc<AsyncMutex<DiskState>>, control_tube: &Option<AsyncTube>, queue_evts: Vec<Event>, kill_evt: Event, ) -> Result<(), String>476 fn run_worker(
477     ex: Executor,
478     interrupt: Interrupt,
479     queues: Vec<Queue>,
480     mem: GuestMemory,
481     disk_state: &Rc<AsyncMutex<DiskState>>,
482     control_tube: &Option<AsyncTube>,
483     queue_evts: Vec<Event>,
484     kill_evt: Event,
485 ) -> Result<(), String> {
486     // Wrap the interupt in a `RefCell` so it can be shared between async functions.
487     let interrupt = Rc::new(RefCell::new(interrupt));
488 
489     // One flush timer per disk.
490     let timer = Timer::new().expect("Failed to create a timer");
491     let flush_timer_armed = Rc::new(RefCell::new(false));
492 
493     // Handle all the queues in one sub-select call.
494     let flush_timer = Rc::new(RefCell::new(
495         TimerAsync::new(
496             // Call try_clone() to share the same underlying FD with the `flush_disk` task.
497             timer.0.try_clone().expect("Failed to clone flush_timer"),
498             &ex,
499         )
500         .expect("Failed to create an async timer"),
501     ));
502     let queue_handlers =
503         queues
504             .into_iter()
505             .map(|q| Rc::new(RefCell::new(q)))
506             .zip(queue_evts.into_iter().map(|e| {
507                 EventAsync::new(e.0, &ex).expect("Failed to create async event for queue")
508             }))
509             .map(|(queue, event)| {
510                 // alias some refs so the lifetimes work.
511                 let mem = &mem;
512                 let disk_state = &disk_state;
513                 let interrupt = &interrupt;
514                 handle_queue(
515                     &ex,
516                     mem,
517                     Rc::clone(&disk_state),
518                     Rc::clone(&queue),
519                     event,
520                     interrupt.clone(),
521                     Rc::clone(&flush_timer),
522                     Rc::clone(&flush_timer_armed),
523                 )
524             })
525             .collect::<FuturesUnordered<_>>()
526             .into_future();
527 
528     // Flushes the disk periodically.
529     let flush_timer = TimerAsync::new(timer.0, &ex).expect("Failed to create an async timer");
530     let disk_flush = flush_disk(disk_state.clone(), flush_timer, flush_timer_armed.clone());
531     pin_mut!(disk_flush);
532 
533     // Handles control requests.
534     let control = handle_command_tube(control_tube, interrupt.clone(), disk_state.clone());
535     pin_mut!(control);
536 
537     // Process any requests to resample the irq value.
538     let resample = handle_irq_resample(&ex, interrupt.clone());
539     pin_mut!(resample);
540 
541     // Exit if the kill event is triggered.
542     let kill_evt = EventAsync::new(kill_evt.0, &ex).expect("Failed to create async kill event fd");
543     let kill = wait_kill(kill_evt);
544     pin_mut!(kill);
545 
546     match ex.run_until(select5(queue_handlers, disk_flush, control, resample, kill)) {
547         Ok((_, flush_res, control_res, resample_res, _)) => {
548             if let SelectResult::Finished(Err(e)) = flush_res {
549                 return Err(format!("failed to flush a disk: {}", e));
550             }
551             if let SelectResult::Finished(Err(e)) = control_res {
552                 return Err(format!("failed to handle a control request: {}", e));
553             }
554             if let SelectResult::Finished(Err(e)) = resample_res {
555                 return Err(format!("failed to resample a irq value: {:?}", e));
556             }
557             Ok(())
558         }
559         Err(e) => Err(e.to_string()),
560     }
561 }
562 
563 /// Virtio device for exposing block level read/write operations on a host file.
564 pub struct BlockAsync {
565     kill_evt: Option<Event>,
566     worker_thread: Option<thread::JoinHandle<(Box<dyn ToAsyncDisk>, Option<Tube>)>>,
567     disk_image: Option<Box<dyn ToAsyncDisk>>,
568     disk_size: Arc<AtomicU64>,
569     avail_features: u64,
570     read_only: bool,
571     sparse: bool,
572     seg_max: u32,
573     block_size: u32,
574     id: Option<BlockId>,
575     control_tube: Option<Tube>,
576 }
577 
build_config_space(disk_size: u64, seg_max: u32, block_size: u32) -> virtio_blk_config578 fn build_config_space(disk_size: u64, seg_max: u32, block_size: u32) -> virtio_blk_config {
579     virtio_blk_config {
580         // If the image is not a multiple of the sector size, the tail bits are not exposed.
581         capacity: Le64::from(disk_size >> SECTOR_SHIFT),
582         seg_max: Le32::from(seg_max),
583         blk_size: Le32::from(block_size),
584         num_queues: Le16::from(NUM_QUEUES),
585         max_discard_sectors: Le32::from(MAX_DISCARD_SECTORS),
586         discard_sector_alignment: Le32::from(DISCARD_SECTOR_ALIGNMENT),
587         max_write_zeroes_sectors: Le32::from(MAX_WRITE_ZEROES_SECTORS),
588         write_zeroes_may_unmap: 1,
589         max_discard_seg: Le32::from(MAX_DISCARD_SEG),
590         max_write_zeroes_seg: Le32::from(MAX_WRITE_ZEROES_SEG),
591         ..Default::default()
592     }
593 }
594 
595 impl BlockAsync {
596     /// Create a new virtio block device that operates on the given AsyncDisk.
new( base_features: u64, disk_image: Box<dyn ToAsyncDisk>, read_only: bool, sparse: bool, block_size: u32, id: Option<BlockId>, control_tube: Option<Tube>, ) -> SysResult<BlockAsync>597     pub fn new(
598         base_features: u64,
599         disk_image: Box<dyn ToAsyncDisk>,
600         read_only: bool,
601         sparse: bool,
602         block_size: u32,
603         id: Option<BlockId>,
604         control_tube: Option<Tube>,
605     ) -> SysResult<BlockAsync> {
606         if block_size % SECTOR_SIZE as u32 != 0 {
607             error!(
608                 "Block size {} is not a multiple of {}.",
609                 block_size, SECTOR_SIZE,
610             );
611             return Err(SysError::new(libc::EINVAL));
612         }
613         let disk_size = disk_image.get_len()?;
614         if disk_size % block_size as u64 != 0 {
615             warn!(
616                 "Disk size {} is not a multiple of block size {}; \
617                  the remainder will not be visible to the guest.",
618                 disk_size, block_size,
619             );
620         }
621 
622         let mut avail_features: u64 = base_features;
623         avail_features |= 1 << VIRTIO_BLK_F_FLUSH;
624         if read_only {
625             avail_features |= 1 << VIRTIO_BLK_F_RO;
626         } else {
627             if sparse {
628                 avail_features |= 1 << VIRTIO_BLK_F_DISCARD;
629             }
630             avail_features |= 1 << VIRTIO_BLK_F_WRITE_ZEROES;
631         }
632         avail_features |= 1 << VIRTIO_BLK_F_SEG_MAX;
633         avail_features |= 1 << VIRTIO_BLK_F_BLK_SIZE;
634         avail_features |= 1 << VIRTIO_BLK_F_MQ;
635 
636         let seg_max = min(max(iov_max(), 1), u32::max_value() as usize) as u32;
637 
638         // Since we do not currently support indirect descriptors, the maximum
639         // number of segments must be smaller than the queue size.
640         // In addition, the request header and status each consume a descriptor.
641         let seg_max = min(seg_max, u32::from(QUEUE_SIZE) - 2);
642 
643         Ok(BlockAsync {
644             kill_evt: None,
645             worker_thread: None,
646             disk_image: Some(disk_image),
647             disk_size: Arc::new(AtomicU64::new(disk_size)),
648             avail_features,
649             read_only,
650             sparse,
651             seg_max,
652             block_size,
653             id,
654             control_tube,
655         })
656     }
657 
658     // Execute a single block device request.
659     // `writer` includes the data region only; the status byte is not included.
660     // It is up to the caller to convert the result of this function into a status byte
661     // and write it to the expected location in guest memory.
execute_request( reader: &mut Reader, writer: &mut Writer, disk_state: Rc<AsyncMutex<DiskState>>, flush_timer: Rc<RefCell<TimerAsync>>, flush_timer_armed: Rc<RefCell<bool>>, ) -> result::Result<(), ExecuteError>662     async fn execute_request(
663         reader: &mut Reader,
664         writer: &mut Writer,
665         disk_state: Rc<AsyncMutex<DiskState>>,
666         flush_timer: Rc<RefCell<TimerAsync>>,
667         flush_timer_armed: Rc<RefCell<bool>>,
668     ) -> result::Result<(), ExecuteError> {
669         // Acquire immutable access to disk_state to prevent the disk from being resized.
670         let disk_state = disk_state.read_lock().await;
671 
672         let req_header: virtio_blk_req_header = reader.read_obj().map_err(ExecuteError::Read)?;
673 
674         let req_type = req_header.req_type.to_native();
675         let sector = req_header.sector.to_native();
676 
677         if disk_state.read_only && req_type != VIRTIO_BLK_T_IN && req_type != VIRTIO_BLK_T_GET_ID {
678             return Err(ExecuteError::ReadOnly {
679                 request_type: req_type,
680             });
681         }
682 
683         /// Check that a request accesses only data within the disk's current size.
684         /// All parameters are in units of bytes.
685         fn check_range(
686             io_start: u64,
687             io_length: u64,
688             disk_size: u64,
689         ) -> result::Result<(), ExecuteError> {
690             let io_end = io_start
691                 .checked_add(io_length)
692                 .ok_or(ExecuteError::OutOfRange)?;
693             if io_end > disk_size {
694                 Err(ExecuteError::OutOfRange)
695             } else {
696                 Ok(())
697             }
698         }
699 
700         let disk_size = disk_state.disk_size.load(Ordering::Relaxed);
701         match req_type {
702             VIRTIO_BLK_T_IN => {
703                 let data_len = writer.available_bytes();
704                 if data_len == 0 {
705                     return Ok(());
706                 }
707                 let offset = sector
708                     .checked_shl(u32::from(SECTOR_SHIFT))
709                     .ok_or(ExecuteError::OutOfRange)?;
710                 check_range(offset, data_len as u64, disk_size)?;
711                 let disk_image = &disk_state.disk_image;
712                 writer
713                     .write_all_from_at_fut(&**disk_image, data_len, offset)
714                     .await
715                     .map_err(|desc_error| ExecuteError::ReadIo {
716                         length: data_len,
717                         sector,
718                         desc_error,
719                     })?;
720             }
721             VIRTIO_BLK_T_OUT => {
722                 let data_len = reader.available_bytes();
723                 if data_len == 0 {
724                     return Ok(());
725                 }
726                 let offset = sector
727                     .checked_shl(u32::from(SECTOR_SHIFT))
728                     .ok_or(ExecuteError::OutOfRange)?;
729                 check_range(offset, data_len as u64, disk_size)?;
730                 let disk_image = &disk_state.disk_image;
731                 reader
732                     .read_exact_to_at_fut(&**disk_image, data_len, offset)
733                     .await
734                     .map_err(|desc_error| ExecuteError::WriteIo {
735                         length: data_len,
736                         sector,
737                         desc_error,
738                     })?;
739 
740                 if !*flush_timer_armed.borrow() {
741                     *flush_timer_armed.borrow_mut() = true;
742 
743                     let flush_delay = Duration::from_secs(60);
744                     flush_timer
745                         .borrow_mut()
746                         .reset(flush_delay, None)
747                         .map_err(ExecuteError::TimerReset)?;
748                 }
749             }
750             VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_WRITE_ZEROES => {
751                 if req_type == VIRTIO_BLK_T_DISCARD && !disk_state.sparse {
752                     // Discard is a hint; if this is a non-sparse disk, just ignore it.
753                     return Ok(());
754                 }
755 
756                 while reader.available_bytes() >= size_of::<virtio_blk_discard_write_zeroes>() {
757                     let seg: virtio_blk_discard_write_zeroes =
758                         reader.read_obj().map_err(ExecuteError::Read)?;
759 
760                     let sector = seg.sector.to_native();
761                     let num_sectors = seg.num_sectors.to_native();
762                     let flags = seg.flags.to_native();
763 
764                     let valid_flags = if req_type == VIRTIO_BLK_T_WRITE_ZEROES {
765                         VIRTIO_BLK_DISCARD_WRITE_ZEROES_FLAG_UNMAP
766                     } else {
767                         0
768                     };
769 
770                     if (flags & !valid_flags) != 0 {
771                         return Err(ExecuteError::DiscardWriteZeroes {
772                             ioerr: None,
773                             sector,
774                             num_sectors,
775                             flags,
776                         });
777                     }
778 
779                     let offset = sector
780                         .checked_shl(u32::from(SECTOR_SHIFT))
781                         .ok_or(ExecuteError::OutOfRange)?;
782                     let length = u64::from(num_sectors)
783                         .checked_shl(u32::from(SECTOR_SHIFT))
784                         .ok_or(ExecuteError::OutOfRange)?;
785                     check_range(offset, length, disk_size)?;
786 
787                     if req_type == VIRTIO_BLK_T_DISCARD {
788                         // Since Discard is just a hint and some filesystems may not implement
789                         // FALLOC_FL_PUNCH_HOLE, ignore punch_hole errors.
790                         let _ = disk_state.disk_image.punch_hole(offset, length).await;
791                     } else {
792                         disk_state
793                             .disk_image
794                             .write_zeroes_at(offset, length)
795                             .await
796                             .map_err(|e| ExecuteError::DiscardWriteZeroes {
797                                 ioerr: Some(e),
798                                 sector,
799                                 num_sectors,
800                                 flags,
801                             })?;
802                     }
803                 }
804             }
805             VIRTIO_BLK_T_FLUSH => {
806                 disk_state
807                     .disk_image
808                     .fsync()
809                     .await
810                     .map_err(ExecuteError::Flush)?;
811             }
812             VIRTIO_BLK_T_GET_ID => {
813                 if let Some(id) = disk_state.id {
814                     writer.write_all(&id).map_err(ExecuteError::CopyId)?;
815                 } else {
816                     return Err(ExecuteError::Unsupported(req_type));
817                 }
818             }
819             t => return Err(ExecuteError::Unsupported(t)),
820         };
821         Ok(())
822     }
823 }
824 
825 impl Drop for BlockAsync {
drop(&mut self)826     fn drop(&mut self) {
827         if let Some(kill_evt) = self.kill_evt.take() {
828             // Ignore the result because there is nothing we can do about it.
829             let _ = kill_evt.write(1);
830         }
831 
832         if let Some(worker_thread) = self.worker_thread.take() {
833             let _ = worker_thread.join();
834         }
835     }
836 }
837 
838 impl VirtioDevice for BlockAsync {
keep_rds(&self) -> Vec<RawDescriptor>839     fn keep_rds(&self) -> Vec<RawDescriptor> {
840         let mut keep_rds = Vec::new();
841 
842         if let Some(disk_image) = &self.disk_image {
843             keep_rds.extend(disk_image.as_raw_descriptors());
844         }
845 
846         if let Some(control_tube) = &self.control_tube {
847             keep_rds.push(control_tube.as_raw_descriptor());
848         }
849 
850         keep_rds
851     }
852 
features(&self) -> u64853     fn features(&self) -> u64 {
854         self.avail_features
855     }
856 
device_type(&self) -> u32857     fn device_type(&self) -> u32 {
858         TYPE_BLOCK
859     }
860 
queue_max_sizes(&self) -> &[u16]861     fn queue_max_sizes(&self) -> &[u16] {
862         QUEUE_SIZES
863     }
864 
read_config(&self, offset: u64, data: &mut [u8])865     fn read_config(&self, offset: u64, data: &mut [u8]) {
866         let config_space = {
867             let disk_size = self.disk_size.load(Ordering::Acquire);
868             build_config_space(disk_size, self.seg_max, self.block_size)
869         };
870         copy_config(data, 0, config_space.as_slice(), offset);
871     }
872 
activate( &mut self, mem: GuestMemory, interrupt: Interrupt, queues: Vec<Queue>, queue_evts: Vec<Event>, )873     fn activate(
874         &mut self,
875         mem: GuestMemory,
876         interrupt: Interrupt,
877         queues: Vec<Queue>,
878         queue_evts: Vec<Event>,
879     ) {
880         let (self_kill_evt, kill_evt) = match Event::new().and_then(|e| Ok((e.try_clone()?, e))) {
881             Ok(v) => v,
882             Err(e) => {
883                 error!("failed creating kill Event pair: {}", e);
884                 return;
885             }
886         };
887         self.kill_evt = Some(self_kill_evt);
888 
889         let read_only = self.read_only;
890         let sparse = self.sparse;
891         let disk_size = self.disk_size.clone();
892         let id = self.id.take();
893         if let Some(disk_image) = self.disk_image.take() {
894             let control_tube = self.control_tube.take();
895             let worker_result =
896                 thread::Builder::new()
897                     .name("virtio_blk".to_string())
898                     .spawn(move || {
899                         let ex = Executor::new().expect("Failed to create an executor");
900                         let async_control = control_tube
901                             .map(|c| c.into_async_tube(&ex).expect("failed to create async tube"));
902                         let async_image = match disk_image.to_async_disk(&ex) {
903                             Ok(d) => d,
904                             Err(e) => panic!("Failed to create async disk {}", e),
905                         };
906                         let disk_state = Rc::new(AsyncMutex::new(DiskState {
907                             disk_image: async_image,
908                             disk_size,
909                             read_only,
910                             sparse,
911                             id,
912                         }));
913                         if let Err(err_string) = run_worker(
914                             ex,
915                             interrupt,
916                             queues,
917                             mem,
918                             &disk_state,
919                             &async_control,
920                             queue_evts,
921                             kill_evt,
922                         ) {
923                             error!("{}", err_string);
924                         }
925 
926                         let disk_state = match Rc::try_unwrap(disk_state) {
927                             Ok(d) => d.into_inner(),
928                             Err(_) => panic!("too many refs to the disk"),
929                         };
930                         (
931                             disk_state.disk_image.into_inner(),
932                             async_control.map(|c| c.into()),
933                         )
934                     });
935 
936             match worker_result {
937                 Err(e) => {
938                     error!("failed to spawn virtio_blk worker: {}", e);
939                     return;
940                 }
941                 Ok(join_handle) => {
942                     self.worker_thread = Some(join_handle);
943                 }
944             }
945         }
946     }
947 
reset(&mut self) -> bool948     fn reset(&mut self) -> bool {
949         if let Some(kill_evt) = self.kill_evt.take() {
950             if kill_evt.write(1).is_err() {
951                 error!("{}: failed to notify the kill event", self.debug_label());
952                 return false;
953             }
954         }
955 
956         if let Some(worker_thread) = self.worker_thread.take() {
957             match worker_thread.join() {
958                 Err(_) => {
959                     error!("{}: failed to get back resources", self.debug_label());
960                     return false;
961                 }
962                 Ok((disk_image, control_tube)) => {
963                     self.disk_image = Some(disk_image);
964                     self.control_tube = control_tube;
965                     return true;
966                 }
967             }
968         }
969         false
970     }
971 }
972 
973 #[cfg(test)]
974 mod tests {
975     use std::fs::{File, OpenOptions};
976     use std::mem::size_of_val;
977     use std::sync::atomic::AtomicU64;
978 
979     use disk::SingleFileDisk;
980     use tempfile::TempDir;
981     use vm_memory::GuestAddress;
982 
983     use crate::virtio::base_features;
984     use crate::virtio::descriptor_utils::{create_descriptor_chain, DescriptorType};
985     use crate::ProtectionType;
986 
987     use super::*;
988 
989     #[test]
read_size()990     fn read_size() {
991         let tempdir = TempDir::new().unwrap();
992         let mut path = tempdir.path().to_owned();
993         path.push("disk_image");
994         let f = File::create(&path).unwrap();
995         f.set_len(0x1000).unwrap();
996 
997         let features = base_features(ProtectionType::Unprotected);
998         let b = BlockAsync::new(features, Box::new(f), true, false, 512, None, None).unwrap();
999         let mut num_sectors = [0u8; 4];
1000         b.read_config(0, &mut num_sectors);
1001         // size is 0x1000, so num_sectors is 8 (4096/512).
1002         assert_eq!([0x08, 0x00, 0x00, 0x00], num_sectors);
1003         let mut msw_sectors = [0u8; 4];
1004         b.read_config(4, &mut msw_sectors);
1005         // size is 0x1000, so msw_sectors is 0.
1006         assert_eq!([0x00, 0x00, 0x00, 0x00], msw_sectors);
1007     }
1008 
1009     #[test]
read_block_size()1010     fn read_block_size() {
1011         let tempdir = TempDir::new().unwrap();
1012         let mut path = tempdir.path().to_owned();
1013         path.push("disk_image");
1014         let f = File::create(&path).unwrap();
1015         f.set_len(0x1000).unwrap();
1016 
1017         let features = base_features(ProtectionType::Unprotected);
1018         let b = BlockAsync::new(features, Box::new(f), true, false, 4096, None, None).unwrap();
1019         let mut blk_size = [0u8; 4];
1020         b.read_config(20, &mut blk_size);
1021         // blk_size should be 4096 (0x1000).
1022         assert_eq!([0x00, 0x10, 0x00, 0x00], blk_size);
1023     }
1024 
1025     #[test]
read_features()1026     fn read_features() {
1027         let tempdir = TempDir::new().unwrap();
1028         let mut path = tempdir.path().to_owned();
1029         path.push("disk_image");
1030 
1031         // read-write block device
1032         {
1033             let f = File::create(&path).unwrap();
1034             let features = base_features(ProtectionType::Unprotected);
1035             let b = BlockAsync::new(features, Box::new(f), false, true, 512, None, None).unwrap();
1036             // writable device should set VIRTIO_BLK_F_FLUSH + VIRTIO_BLK_F_DISCARD
1037             // + VIRTIO_BLK_F_WRITE_ZEROES + VIRTIO_F_VERSION_1 + VIRTIO_BLK_F_BLK_SIZE
1038             // + VIRTIO_BLK_F_SEG_MAX + VIRTIO_BLK_F_MQ
1039             assert_eq!(0x100007244, b.features());
1040         }
1041 
1042         // read-write block device, non-sparse
1043         {
1044             let f = File::create(&path).unwrap();
1045             let features = base_features(ProtectionType::Unprotected);
1046             let b = BlockAsync::new(features, Box::new(f), false, false, 512, None, None).unwrap();
1047             // read-only device should set VIRTIO_BLK_F_FLUSH and VIRTIO_BLK_F_RO
1048             // + VIRTIO_F_VERSION_1 + VIRTIO_BLK_F_BLK_SIZE + VIRTIO_BLK_F_SEG_MAX
1049             // + VIRTIO_BLK_F_MQ
1050             assert_eq!(0x100005244, b.features());
1051         }
1052 
1053         // read-only block device
1054         {
1055             let f = File::create(&path).unwrap();
1056             let features = base_features(ProtectionType::Unprotected);
1057             let b = BlockAsync::new(features, Box::new(f), true, true, 512, None, None).unwrap();
1058             // read-only device should set VIRTIO_BLK_F_FLUSH and VIRTIO_BLK_F_RO
1059             // + VIRTIO_F_VERSION_1 + VIRTIO_BLK_F_BLK_SIZE + VIRTIO_BLK_F_SEG_MAX
1060             // + VIRTIO_BLK_F_MQ
1061             assert_eq!(0x100001264, b.features());
1062         }
1063     }
1064 
1065     #[test]
read_last_sector()1066     fn read_last_sector() {
1067         let ex = Executor::new().expect("creating an executor failed");
1068 
1069         let tempdir = TempDir::new().unwrap();
1070         let mut path = tempdir.path().to_owned();
1071         path.push("disk_image");
1072         let f = OpenOptions::new()
1073             .read(true)
1074             .write(true)
1075             .create(true)
1076             .open(&path)
1077             .unwrap();
1078         let disk_size = 0x1000;
1079         f.set_len(disk_size).unwrap();
1080         let af = SingleFileDisk::new(f, &ex).expect("Failed to create SFD");
1081 
1082         let mem = Rc::new(
1083             GuestMemory::new(&[(GuestAddress(0u64), 4 * 1024 * 1024)])
1084                 .expect("Creating guest memory failed."),
1085         );
1086 
1087         let req_hdr = virtio_blk_req_header {
1088             req_type: Le32::from(VIRTIO_BLK_T_IN),
1089             reserved: Le32::from(0),
1090             sector: Le64::from(7), // Disk is 8 sectors long, so this is the last valid sector.
1091         };
1092         mem.write_obj_at_addr(req_hdr, GuestAddress(0x1000))
1093             .expect("writing req failed");
1094 
1095         let avail_desc = create_descriptor_chain(
1096             &mem,
1097             GuestAddress(0x100),  // Place descriptor chain at 0x100.
1098             GuestAddress(0x1000), // Describe buffer at 0x1000.
1099             vec![
1100                 // Request header
1101                 (DescriptorType::Readable, size_of_val(&req_hdr) as u32),
1102                 // I/O buffer (1 sector of data)
1103                 (DescriptorType::Writable, 512),
1104                 // Request status
1105                 (DescriptorType::Writable, 1),
1106             ],
1107             0,
1108         )
1109         .expect("create_descriptor_chain failed");
1110 
1111         let timer = Timer::new().expect("Failed to create a timer");
1112         let flush_timer = Rc::new(RefCell::new(
1113             TimerAsync::new(timer.0, &ex).expect("Failed to create an async timer"),
1114         ));
1115         let flush_timer_armed = Rc::new(RefCell::new(false));
1116 
1117         let disk_state = Rc::new(AsyncMutex::new(DiskState {
1118             disk_image: Box::new(af),
1119             disk_size: Arc::new(AtomicU64::new(disk_size)),
1120             read_only: false,
1121             sparse: true,
1122             id: None,
1123         }));
1124 
1125         let fut = process_one_request(avail_desc, disk_state, flush_timer, flush_timer_armed, &mem);
1126 
1127         ex.run_until(fut)
1128             .expect("running executor failed")
1129             .expect("execute failed");
1130 
1131         let status_offset = GuestAddress((0x1000 + size_of_val(&req_hdr) + 512) as u64);
1132         let status = mem.read_obj_from_addr::<u8>(status_offset).unwrap();
1133         assert_eq!(status, VIRTIO_BLK_S_OK);
1134     }
1135 
1136     #[test]
read_beyond_last_sector()1137     fn read_beyond_last_sector() {
1138         let tempdir = TempDir::new().unwrap();
1139         let mut path = tempdir.path().to_owned();
1140         path.push("disk_image");
1141         let f = OpenOptions::new()
1142             .read(true)
1143             .write(true)
1144             .create(true)
1145             .open(&path)
1146             .unwrap();
1147         let disk_size = 0x1000;
1148         f.set_len(disk_size).unwrap();
1149         let mem = Rc::new(
1150             GuestMemory::new(&[(GuestAddress(0u64), 4 * 1024 * 1024)])
1151                 .expect("Creating guest memory failed."),
1152         );
1153 
1154         let req_hdr = virtio_blk_req_header {
1155             req_type: Le32::from(VIRTIO_BLK_T_IN),
1156             reserved: Le32::from(0),
1157             sector: Le64::from(7), // Disk is 8 sectors long, so this is the last valid sector.
1158         };
1159         mem.write_obj_at_addr(req_hdr, GuestAddress(0x1000))
1160             .expect("writing req failed");
1161 
1162         let avail_desc = create_descriptor_chain(
1163             &mem,
1164             GuestAddress(0x100),  // Place descriptor chain at 0x100.
1165             GuestAddress(0x1000), // Describe buffer at 0x1000.
1166             vec![
1167                 // Request header
1168                 (DescriptorType::Readable, size_of_val(&req_hdr) as u32),
1169                 // I/O buffer (2 sectors of data - overlap the end of the disk).
1170                 (DescriptorType::Writable, 512 * 2),
1171                 // Request status
1172                 (DescriptorType::Writable, 1),
1173             ],
1174             0,
1175         )
1176         .expect("create_descriptor_chain failed");
1177 
1178         let ex = Executor::new().expect("creating an executor failed");
1179 
1180         let af = SingleFileDisk::new(f, &ex).expect("Failed to create SFD");
1181         let timer = Timer::new().expect("Failed to create a timer");
1182         let flush_timer = Rc::new(RefCell::new(
1183             TimerAsync::new(timer.0, &ex).expect("Failed to create an async timer"),
1184         ));
1185         let flush_timer_armed = Rc::new(RefCell::new(false));
1186         let disk_state = Rc::new(AsyncMutex::new(DiskState {
1187             disk_image: Box::new(af),
1188             disk_size: Arc::new(AtomicU64::new(disk_size)),
1189             read_only: false,
1190             sparse: true,
1191             id: None,
1192         }));
1193 
1194         let fut = process_one_request(avail_desc, disk_state, flush_timer, flush_timer_armed, &mem);
1195 
1196         ex.run_until(fut)
1197             .expect("running executor failed")
1198             .expect("execute failed");
1199 
1200         let status_offset = GuestAddress((0x1000 + size_of_val(&req_hdr) + 512 * 2) as u64);
1201         let status = mem.read_obj_from_addr::<u8>(status_offset).unwrap();
1202         assert_eq!(status, VIRTIO_BLK_S_IOERR);
1203     }
1204 
1205     #[test]
get_id()1206     fn get_id() {
1207         let ex = Executor::new().expect("creating an executor failed");
1208 
1209         let tempdir = TempDir::new().unwrap();
1210         let mut path = tempdir.path().to_owned();
1211         path.push("disk_image");
1212         let f = OpenOptions::new()
1213             .read(true)
1214             .write(true)
1215             .create(true)
1216             .open(&path)
1217             .unwrap();
1218         let disk_size = 0x1000;
1219         f.set_len(disk_size).unwrap();
1220 
1221         let mem = GuestMemory::new(&[(GuestAddress(0u64), 4 * 1024 * 1024)])
1222             .expect("Creating guest memory failed.");
1223 
1224         let req_hdr = virtio_blk_req_header {
1225             req_type: Le32::from(VIRTIO_BLK_T_GET_ID),
1226             reserved: Le32::from(0),
1227             sector: Le64::from(0),
1228         };
1229         mem.write_obj_at_addr(req_hdr, GuestAddress(0x1000))
1230             .expect("writing req failed");
1231 
1232         let avail_desc = create_descriptor_chain(
1233             &mem,
1234             GuestAddress(0x100),  // Place descriptor chain at 0x100.
1235             GuestAddress(0x1000), // Describe buffer at 0x1000.
1236             vec![
1237                 // Request header
1238                 (DescriptorType::Readable, size_of_val(&req_hdr) as u32),
1239                 // I/O buffer (20 bytes for serial)
1240                 (DescriptorType::Writable, 20),
1241                 // Request status
1242                 (DescriptorType::Writable, 1),
1243             ],
1244             0,
1245         )
1246         .expect("create_descriptor_chain failed");
1247 
1248         let af = SingleFileDisk::new(f, &ex).expect("Failed to create SFD");
1249         let timer = Timer::new().expect("Failed to create a timer");
1250         let flush_timer = Rc::new(RefCell::new(
1251             TimerAsync::new(timer.0, &ex).expect("Failed to create an async timer"),
1252         ));
1253         let flush_timer_armed = Rc::new(RefCell::new(false));
1254 
1255         let id = b"a20-byteserialnumber";
1256 
1257         let disk_state = Rc::new(AsyncMutex::new(DiskState {
1258             disk_image: Box::new(af),
1259             disk_size: Arc::new(AtomicU64::new(disk_size)),
1260             read_only: false,
1261             sparse: true,
1262             id: Some(*id),
1263         }));
1264 
1265         let fut = process_one_request(avail_desc, disk_state, flush_timer, flush_timer_armed, &mem);
1266 
1267         ex.run_until(fut)
1268             .expect("running executor failed")
1269             .expect("execute failed");
1270 
1271         let status_offset = GuestAddress((0x1000 + size_of_val(&req_hdr) + 512) as u64);
1272         let status = mem.read_obj_from_addr::<u8>(status_offset).unwrap();
1273         assert_eq!(status, VIRTIO_BLK_S_OK);
1274 
1275         let id_offset = GuestAddress(0x1000 + size_of_val(&req_hdr) as u64);
1276         let returned_id = mem.read_obj_from_addr::<[u8; 20]>(id_offset).unwrap();
1277         assert_eq!(returned_id, *id);
1278     }
1279 }
1280