1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! VM disk image file format I/O.
6
7 use std::cmp::min;
8 use std::fmt::Debug;
9 use std::fs::File;
10 use std::io;
11 use std::io::Seek;
12 use std::io::SeekFrom;
13 use std::path::Path;
14 use std::sync::Arc;
15
16 use async_trait::async_trait;
17 use base::get_filesystem_type;
18 use base::info;
19 use base::AsRawDescriptors;
20 use base::FileAllocate;
21 use base::FileReadWriteAtVolatile;
22 use base::FileSetLen;
23 use cros_async::BackingMemory;
24 use cros_async::Executor;
25 use cros_async::IoSource;
26 use cros_async::MemRegionIter;
27 use thiserror::Error as ThisError;
28
29 mod asynchronous;
30 #[allow(unused)]
31 pub(crate) use asynchronous::AsyncDiskFileWrapper;
32 #[cfg(feature = "qcow")]
33 mod qcow;
34 #[cfg(feature = "qcow")]
35 pub use qcow::QcowFile;
36 #[cfg(feature = "qcow")]
37 pub use qcow::QCOW_MAGIC;
38 mod sys;
39
40 #[cfg(feature = "composite-disk")]
41 mod composite;
42 #[cfg(feature = "composite-disk")]
43 use composite::CompositeDiskFile;
44 #[cfg(feature = "composite-disk")]
45 use composite::CDISK_MAGIC;
46 #[cfg(feature = "composite-disk")]
47 mod gpt;
48 #[cfg(feature = "composite-disk")]
49 pub use composite::create_composite_disk;
50 #[cfg(feature = "composite-disk")]
51 pub use composite::create_zero_filler;
52 #[cfg(feature = "composite-disk")]
53 pub use composite::Error as CompositeError;
54 #[cfg(feature = "composite-disk")]
55 pub use composite::ImagePartitionType;
56 #[cfg(feature = "composite-disk")]
57 pub use composite::PartitionInfo;
58 #[cfg(feature = "composite-disk")]
59 pub use gpt::Error as GptError;
60
61 #[cfg(feature = "android-sparse")]
62 mod android_sparse;
63 #[cfg(feature = "android-sparse")]
64 use android_sparse::AndroidSparse;
65 #[cfg(feature = "android-sparse")]
66 use android_sparse::SPARSE_HEADER_MAGIC;
67 use sys::read_from_disk;
68
69 /// Nesting depth limit for disk formats that can open other disk files.
70 pub const MAX_NESTING_DEPTH: u32 = 10;
71
72 #[derive(ThisError, Debug)]
73 pub enum Error {
74 #[error("failed to create block device: {0}")]
75 BlockDeviceNew(base::Error),
76 #[error("requested file conversion not supported")]
77 ConversionNotSupported,
78 #[cfg(feature = "android-sparse")]
79 #[error("failure in android sparse disk: {0}")]
80 CreateAndroidSparseDisk(android_sparse::Error),
81 #[cfg(feature = "composite-disk")]
82 #[error("failure in composite disk: {0}")]
83 CreateCompositeDisk(composite::Error),
84 #[error("failure creating single file disk: {0}")]
85 CreateSingleFileDisk(cros_async::AsyncError),
86 #[error("failure with fdatasync: {0}")]
87 Fdatasync(cros_async::AsyncError),
88 #[error("failure with fsync: {0}")]
89 Fsync(cros_async::AsyncError),
90 #[error("failure with fdatasync: {0}")]
91 IoFdatasync(io::Error),
92 #[error("failure with flush: {0}")]
93 IoFlush(io::Error),
94 #[error("failure with fsync: {0}")]
95 IoFsync(io::Error),
96 #[error("failure to punch hole: {0}")]
97 IoPunchHole(io::Error),
98 #[error("checking host fs type: {0}")]
99 HostFsType(base::Error),
100 #[error("maximum disk nesting depth exceeded")]
101 MaxNestingDepthExceeded,
102 #[error("failure to punch hole: {0}")]
103 PunchHole(cros_async::AsyncError),
104 #[error("failure to punch hole for block device file: {0}")]
105 PunchHoleBlockDeviceFile(base::Error),
106 #[cfg(feature = "qcow")]
107 #[error("failure in qcow: {0}")]
108 QcowError(qcow::Error),
109 #[error("failed to read data: {0}")]
110 ReadingData(io::Error),
111 #[error("failed to read header: {0}")]
112 ReadingHeader(io::Error),
113 #[error("failed to read to memory: {0}")]
114 ReadToMem(cros_async::AsyncError),
115 #[error("failed to seek file: {0}")]
116 SeekingFile(io::Error),
117 #[error("failed to set file size: {0}")]
118 SettingFileSize(io::Error),
119 #[error("unknown disk type")]
120 UnknownType,
121 #[error("failed to write from memory: {0}")]
122 WriteFromMem(cros_async::AsyncError),
123 #[error("failed to write from vec: {0}")]
124 WriteFromVec(cros_async::AsyncError),
125 #[error("failed to write zeroes: {0}")]
126 WriteZeroes(io::Error),
127 #[error("failed to write data: {0}")]
128 WritingData(io::Error),
129 #[error("failed to convert to async: {0}")]
130 ToAsync(cros_async::AsyncError),
131 #[cfg(windows)]
132 #[error("failed to set disk file sparse: {0}")]
133 SetSparseFailure(io::Error),
134 #[error("failure with guest memory access: {0}")]
135 GuestMemory(cros_async::mem::Error),
136 #[error("unsupported operation")]
137 UnsupportedOperation,
138 }
139
140 pub type Result<T> = std::result::Result<T, Error>;
141
142 /// A trait for getting the length of a disk image or raw block device.
143 pub trait DiskGetLen {
144 /// Get the current length of the disk in bytes.
get_len(&self) -> io::Result<u64>145 fn get_len(&self) -> io::Result<u64>;
146 }
147
148 impl DiskGetLen for File {
get_len(&self) -> io::Result<u64>149 fn get_len(&self) -> io::Result<u64> {
150 let mut s = self;
151 let orig_seek = s.stream_position()?;
152 let end = s.seek(SeekFrom::End(0))?;
153 s.seek(SeekFrom::Start(orig_seek))?;
154 Ok(end)
155 }
156 }
157
158 /// The prerequisites necessary to support a block device.
159 pub trait DiskFile:
160 FileSetLen + DiskGetLen + FileReadWriteAtVolatile + ToAsyncDisk + Send + AsRawDescriptors + Debug
161 {
162 /// Creates a new DiskFile instance that shares the same underlying disk file image. IO
163 /// operations to a DiskFile should affect all DiskFile instances with the same underlying disk
164 /// file image.
165 ///
166 /// `try_clone()` returns [`io::ErrorKind::Unsupported`] Error if a DiskFile does not support
167 /// creating an instance with the same underlying disk file image.
try_clone(&self) -> io::Result<Box<dyn DiskFile>>168 fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
169 Err(io::Error::new(
170 io::ErrorKind::Unsupported,
171 "unsupported operation",
172 ))
173 }
174 }
175
176 /// A `DiskFile` that can be converted for asychronous access.
177 pub trait ToAsyncDisk: AsRawDescriptors + DiskGetLen + Send {
178 /// Convert a boxed self in to a box-wrapped implementaiton of AsyncDisk.
179 /// Used to convert a standard disk image to an async disk image. This conversion and the
180 /// inverse are needed so that the `Send` DiskImage can be given to the block thread where it is
181 /// converted to a non-`Send` AsyncDisk. The AsyncDisk can then be converted back and returned
182 /// to the main device thread if the block device is destroyed or reset.
to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>183 fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>;
184 }
185
186 impl ToAsyncDisk for File {
to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>187 fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>> {
188 Ok(Box::new(SingleFileDisk::new(*self, ex)?))
189 }
190 }
191
192 /// The variants of image files on the host that can be used as virtual disks.
193 #[derive(Debug, PartialEq, Eq)]
194 pub enum ImageType {
195 Raw,
196 Qcow2,
197 CompositeDisk,
198 AndroidSparse,
199 }
200
log_host_fs_type(file: &File) -> Result<()>201 fn log_host_fs_type(file: &File) -> Result<()> {
202 let fstype = get_filesystem_type(file).map_err(Error::HostFsType)?;
203 info!("Disk image file is hosted on file system type {:x}", fstype);
204 Ok(())
205 }
206
207 /// Detect the type of an image file by checking for a valid header of the supported formats.
detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType>208 pub fn detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType> {
209 let mut f = file;
210 let disk_size = f.get_len().map_err(Error::SeekingFile)?;
211 let orig_seek = f.stream_position().map_err(Error::SeekingFile)?;
212
213 info!("disk size {}, ", disk_size);
214 log_host_fs_type(f)?;
215 // Try to read the disk in a nicely-aligned block size unless the whole file is smaller.
216 const MAGIC_BLOCK_SIZE: usize = 4096;
217 #[repr(align(4096))]
218 struct BlockAlignedBuffer {
219 data: [u8; MAGIC_BLOCK_SIZE],
220 }
221 let mut magic = BlockAlignedBuffer {
222 data: [0u8; MAGIC_BLOCK_SIZE],
223 };
224 let magic_read_len = if disk_size > MAGIC_BLOCK_SIZE as u64 {
225 MAGIC_BLOCK_SIZE
226 } else {
227 // This cast is safe since we know disk_size is less than MAGIC_BLOCK_SIZE (4096) and
228 // therefore is representable in usize.
229 disk_size as usize
230 };
231
232 read_from_disk(f, 0, &mut magic.data[0..magic_read_len], overlapped_mode)?;
233 f.seek(SeekFrom::Start(orig_seek))
234 .map_err(Error::SeekingFile)?;
235
236 #[cfg(feature = "composite-disk")]
237 if let Some(cdisk_magic) = magic.data.get(0..CDISK_MAGIC.len()) {
238 if cdisk_magic == CDISK_MAGIC.as_bytes() {
239 return Ok(ImageType::CompositeDisk);
240 }
241 }
242
243 #[allow(unused_variables)] // magic4 is only used with the qcow or android-sparse features.
244 if let Some(magic4) = magic.data.get(0..4) {
245 #[cfg(feature = "qcow")]
246 if magic4 == QCOW_MAGIC.to_be_bytes() {
247 return Ok(ImageType::Qcow2);
248 }
249 #[cfg(feature = "android-sparse")]
250 if magic4 == SPARSE_HEADER_MAGIC.to_le_bytes() {
251 return Ok(ImageType::AndroidSparse);
252 }
253 }
254
255 Ok(ImageType::Raw)
256 }
257
258 impl DiskFile for File {
try_clone(&self) -> io::Result<Box<dyn DiskFile>>259 fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
260 Ok(Box::new(self.try_clone()?))
261 }
262 }
263
264 /// Inspect the image file type and create an appropriate disk file to match it.
create_disk_file( raw_image: File, is_sparse_file: bool, max_nesting_depth: u32, image_path: &Path, ) -> Result<Box<dyn DiskFile>>265 pub fn create_disk_file(
266 raw_image: File,
267 is_sparse_file: bool,
268 max_nesting_depth: u32,
269 image_path: &Path,
270 ) -> Result<Box<dyn DiskFile>> {
271 let image_type = detect_image_type(&raw_image, false)?;
272 create_disk_file_of_type(
273 raw_image,
274 is_sparse_file,
275 max_nesting_depth,
276 image_path,
277 image_type,
278 )
279 }
280
281 /// create an appropriate disk file to match give image type.
create_disk_file_of_type( raw_image: File, is_sparse_file: bool, #[allow(unused_variables)] mut max_nesting_depth: u32, #[allow(unused_variables)] image_path: &Path, image_type: ImageType, ) -> Result<Box<dyn DiskFile>>282 pub fn create_disk_file_of_type(
283 raw_image: File,
284 is_sparse_file: bool,
285 // max_nesting_depth is only used if the composite-disk or qcow features are enabled.
286 #[allow(unused_variables)] mut max_nesting_depth: u32,
287 // image_path is only used if the composite-disk feature is enabled.
288 #[allow(unused_variables)] image_path: &Path,
289 image_type: ImageType,
290 ) -> Result<Box<dyn DiskFile>> {
291 if max_nesting_depth == 0 {
292 return Err(Error::MaxNestingDepthExceeded);
293 }
294 #[allow(unused_assignments)]
295 {
296 max_nesting_depth -= 1;
297 }
298
299 Ok(match image_type {
300 ImageType::Raw => {
301 sys::apply_raw_disk_file_options(&raw_image, is_sparse_file)?;
302 Box::new(raw_image) as Box<dyn DiskFile>
303 }
304 #[cfg(feature = "qcow")]
305 ImageType::Qcow2 => {
306 Box::new(QcowFile::from(raw_image, max_nesting_depth).map_err(Error::QcowError)?)
307 as Box<dyn DiskFile>
308 }
309 #[cfg(feature = "composite-disk")]
310 ImageType::CompositeDisk => {
311 // Valid composite disk header present
312 Box::new(
313 CompositeDiskFile::from_file(
314 raw_image,
315 is_sparse_file,
316 max_nesting_depth,
317 image_path,
318 )
319 .map_err(Error::CreateCompositeDisk)?,
320 ) as Box<dyn DiskFile>
321 }
322 #[cfg(feature = "android-sparse")]
323 ImageType::AndroidSparse => {
324 Box::new(AndroidSparse::from_file(raw_image).map_err(Error::CreateAndroidSparseDisk)?)
325 as Box<dyn DiskFile>
326 }
327 #[allow(unreachable_patterns)]
328 _ => return Err(Error::UnknownType),
329 })
330 }
331
332 /// An asynchronously accessible disk.
333 #[async_trait(?Send)]
334 pub trait AsyncDisk: DiskGetLen + FileSetLen + FileAllocate {
335 /// Returns the inner file consuming self.
into_inner(self: Box<Self>) -> Box<dyn DiskFile>336 fn into_inner(self: Box<Self>) -> Box<dyn DiskFile>;
337
338 /// Flush intermediary buffers and/or dirty state to file. fsync not required.
flush(&self) -> Result<()>339 async fn flush(&self) -> Result<()>;
340
341 /// Asynchronously fsyncs any completed operations to the disk.
fsync(&self) -> Result<()>342 async fn fsync(&self) -> Result<()>;
343
344 /// Asynchronously fdatasyncs any completed operations to the disk.
345 /// Note that an implementation may simply call fsync for fdatasync.
fdatasync(&self) -> Result<()>346 async fn fdatasync(&self) -> Result<()>;
347
348 /// Reads from the file at 'file_offset' into memory `mem` at `mem_offsets`.
349 /// `mem_offsets` is similar to an iovec except relative to the start of `mem`.
read_to_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>350 async fn read_to_mem<'a>(
351 &'a self,
352 file_offset: u64,
353 mem: Arc<dyn BackingMemory + Send + Sync>,
354 mem_offsets: cros_async::MemRegionIter<'a>,
355 ) -> Result<usize>;
356
357 /// Writes to the file at 'file_offset' from memory `mem` at `mem_offsets`.
write_from_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>358 async fn write_from_mem<'a>(
359 &'a self,
360 file_offset: u64,
361 mem: Arc<dyn BackingMemory + Send + Sync>,
362 mem_offsets: cros_async::MemRegionIter<'a>,
363 ) -> Result<usize>;
364
365 /// Replaces a range of bytes with a hole.
punch_hole(&self, file_offset: u64, length: u64) -> Result<()>366 async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()>;
367
368 /// Writes up to `length` bytes of zeroes to the stream, returning how many bytes were written.
write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>369 async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>;
370
371 /// Reads from the file at 'file_offset' into `buf`.
372 ///
373 /// Less efficient than `read_to_mem` because of extra copies and allocations.
read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize>374 async fn read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize> {
375 let backing_mem = Arc::new(cros_async::VecIoWrapper::from(vec![0u8; buf.len()]));
376 let region = cros_async::MemRegion {
377 offset: 0,
378 len: buf.len(),
379 };
380 let n = self
381 .read_to_mem(
382 file_offset,
383 backing_mem.clone(),
384 MemRegionIter::new(&[region]),
385 )
386 .await?;
387 backing_mem
388 .get_volatile_slice(region)
389 .expect("BUG: the VecIoWrapper shrank?")
390 .sub_slice(0, n)
391 .expect("BUG: read_to_mem return value too large?")
392 .copy_to(buf);
393 Ok(n)
394 }
395
396 /// Writes to the file at 'file_offset' from `buf`.
397 ///
398 /// Less efficient than `write_from_mem` because of extra copies and allocations.
write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize>399 async fn write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize> {
400 let backing_mem = Arc::new(cros_async::VecIoWrapper::from(buf.to_vec()));
401 let region = cros_async::MemRegion {
402 offset: 0,
403 len: buf.len(),
404 };
405 self.write_from_mem(
406 file_offset,
407 backing_mem,
408 cros_async::MemRegionIter::new(&[region]),
409 )
410 .await
411 }
412 }
413
414 /// A disk backed by a single file that implements `AsyncDisk` for access.
415 pub struct SingleFileDisk {
416 inner: IoSource<File>,
417 // Whether the backed file is a block device since the punch-hole needs different operation.
418 #[cfg(any(target_os = "android", target_os = "linux"))]
419 is_block_device_file: bool,
420 }
421
422 impl DiskGetLen for SingleFileDisk {
get_len(&self) -> io::Result<u64>423 fn get_len(&self) -> io::Result<u64> {
424 self.inner.as_source().get_len()
425 }
426 }
427
428 impl FileSetLen for SingleFileDisk {
set_len(&self, len: u64) -> io::Result<()>429 fn set_len(&self, len: u64) -> io::Result<()> {
430 self.inner.as_source().set_len(len)
431 }
432 }
433
434 impl FileAllocate for SingleFileDisk {
allocate(&mut self, offset: u64, len: u64) -> io::Result<()>435 fn allocate(&mut self, offset: u64, len: u64) -> io::Result<()> {
436 self.inner.as_source_mut().allocate(offset, len)
437 }
438 }
439
440 #[async_trait(?Send)]
441 impl AsyncDisk for SingleFileDisk {
into_inner(self: Box<Self>) -> Box<dyn DiskFile>442 fn into_inner(self: Box<Self>) -> Box<dyn DiskFile> {
443 Box::new(self.inner.into_source())
444 }
445
flush(&self) -> Result<()>446 async fn flush(&self) -> Result<()> {
447 // Nothing to flush, all file mutations are immediately sent to the OS.
448 Ok(())
449 }
450
fsync(&self) -> Result<()>451 async fn fsync(&self) -> Result<()> {
452 self.inner.fsync().await.map_err(Error::Fsync)
453 }
454
fdatasync(&self) -> Result<()>455 async fn fdatasync(&self) -> Result<()> {
456 self.inner.fdatasync().await.map_err(Error::Fdatasync)
457 }
458
read_to_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>459 async fn read_to_mem<'a>(
460 &'a self,
461 file_offset: u64,
462 mem: Arc<dyn BackingMemory + Send + Sync>,
463 mem_offsets: cros_async::MemRegionIter<'a>,
464 ) -> Result<usize> {
465 self.inner
466 .read_to_mem(Some(file_offset), mem, mem_offsets)
467 .await
468 .map_err(Error::ReadToMem)
469 }
470
write_from_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>471 async fn write_from_mem<'a>(
472 &'a self,
473 file_offset: u64,
474 mem: Arc<dyn BackingMemory + Send + Sync>,
475 mem_offsets: cros_async::MemRegionIter<'a>,
476 ) -> Result<usize> {
477 self.inner
478 .write_from_mem(Some(file_offset), mem, mem_offsets)
479 .await
480 .map_err(Error::WriteFromMem)
481 }
482
punch_hole(&self, file_offset: u64, length: u64) -> Result<()>483 async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()> {
484 #[cfg(any(target_os = "android", target_os = "linux"))]
485 if self.is_block_device_file {
486 return base::linux::discard_block(self.inner.as_source(), file_offset, length)
487 .map_err(Error::PunchHoleBlockDeviceFile);
488 }
489 self.inner
490 .punch_hole(file_offset, length)
491 .await
492 .map_err(Error::PunchHole)
493 }
494
write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>495 async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()> {
496 if self
497 .inner
498 .write_zeroes_at(file_offset, length)
499 .await
500 .is_ok()
501 {
502 return Ok(());
503 }
504
505 // Fall back to filling zeros if more efficient write_zeroes_at doesn't work.
506 let buf_size = min(length, 0x10000);
507 let mut nwritten = 0;
508 while nwritten < length {
509 let remaining = length - nwritten;
510 let write_size = min(remaining, buf_size) as usize;
511 let buf = vec![0u8; write_size];
512 nwritten += self
513 .inner
514 .write_from_vec(Some(file_offset + nwritten), buf)
515 .await
516 .map(|(n, _)| n as u64)
517 .map_err(Error::WriteFromVec)?;
518 }
519 Ok(())
520 }
521 }
522