• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 mod qcow_raw_file;
6 mod refcount;
7 mod vec_cache;
8 
9 use std::cmp::max;
10 use std::cmp::min;
11 use std::fs::File;
12 use std::fs::OpenOptions;
13 use std::io;
14 use std::io::Read;
15 use std::io::Seek;
16 use std::io::SeekFrom;
17 use std::io::Write;
18 use std::mem::size_of;
19 use std::path::Path;
20 use std::str;
21 
22 use base::error;
23 use base::open_file_or_duplicate;
24 use base::AsRawDescriptor;
25 use base::AsRawDescriptors;
26 use base::FileAllocate;
27 use base::FileReadWriteAtVolatile;
28 use base::FileSetLen;
29 use base::FileSync;
30 use base::PunchHoleMut;
31 use base::RawDescriptor;
32 use base::VolatileMemory;
33 use base::VolatileSlice;
34 use base::WriteZeroesAt;
35 use cros_async::Executor;
36 use libc::EINVAL;
37 use libc::ENOSPC;
38 use libc::ENOTSUP;
39 use remain::sorted;
40 use thiserror::Error;
41 
42 use crate::asynchronous::DiskFlush;
43 use crate::create_disk_file;
44 use crate::qcow::qcow_raw_file::QcowRawFile;
45 use crate::qcow::refcount::RefCount;
46 use crate::qcow::vec_cache::CacheMap;
47 use crate::qcow::vec_cache::Cacheable;
48 use crate::qcow::vec_cache::VecCache;
49 use crate::AsyncDisk;
50 use crate::AsyncDiskFileWrapper;
51 use crate::DiskFile;
52 use crate::DiskGetLen;
53 use crate::ToAsyncDisk;
54 
55 #[sorted]
56 #[derive(Error, Debug)]
57 pub enum Error {
58     #[error("backing file io error: {0}")]
59     BackingFileIo(io::Error),
60     #[error("backing file open error: {0}")]
61     BackingFileOpen(Box<crate::Error>),
62     #[error("backing file name is too long: {0} bytes over")]
63     BackingFileTooLong(usize),
64     #[error("compressed blocks not supported")]
65     CompressedBlocksNotSupported,
66     #[error("failed to evict cache: {0}")]
67     EvictingCache(io::Error),
68     #[error("file larger than max of {}: {0}", MAX_QCOW_FILE_SIZE)]
69     FileTooBig(u64),
70     #[error("failed to get file size: {0}")]
71     GettingFileSize(io::Error),
72     #[error("failed to get refcount: {0}")]
73     GettingRefcount(refcount::Error),
74     #[error("failed to parse filename: {0}")]
75     InvalidBackingFileName(str::Utf8Error),
76     #[error("invalid cluster index")]
77     InvalidClusterIndex,
78     #[error("invalid cluster size")]
79     InvalidClusterSize,
80     #[error("invalid index")]
81     InvalidIndex,
82     #[error("invalid L1 table offset")]
83     InvalidL1TableOffset,
84     #[error("invalid L1 table size {0}")]
85     InvalidL1TableSize(u32),
86     #[error("invalid magic")]
87     InvalidMagic,
88     #[error("invalid offset")]
89     InvalidOffset(u64),
90     #[error("invalid refcount table offset")]
91     InvalidRefcountTableOffset,
92     #[error("invalid refcount table size: {0}")]
93     InvalidRefcountTableSize(u64),
94     #[error("no free clusters")]
95     NoFreeClusters,
96     #[error("no refcount clusters")]
97     NoRefcountClusters,
98     #[error("not enough space for refcounts")]
99     NotEnoughSpaceForRefcounts,
100     #[error("failed to open file: {0}")]
101     OpeningFile(io::Error),
102     #[error("failed to open file: {0}")]
103     ReadingHeader(io::Error),
104     #[error("failed to read pointers: {0}")]
105     ReadingPointers(io::Error),
106     #[error("failed to read ref count block: {0}")]
107     ReadingRefCountBlock(refcount::Error),
108     #[error("failed to read ref counts: {0}")]
109     ReadingRefCounts(io::Error),
110     #[error("failed to rebuild ref counts: {0}")]
111     RebuildingRefCounts(io::Error),
112     #[error("refcount table offset past file end")]
113     RefcountTableOffEnd,
114     #[error("too many clusters specified for refcount table")]
115     RefcountTableTooLarge,
116     #[error("failed to seek file: {0}")]
117     SeekingFile(io::Error),
118     #[error("failed to set refcount refcount: {0}")]
119     SettingRefcountRefcount(io::Error),
120     #[error("size too small for number of clusters")]
121     SizeTooSmallForNumberOfClusters,
122     #[error("l1 entry table too large: {0}")]
123     TooManyL1Entries(u64),
124     #[error("ref count table too large: {0}")]
125     TooManyRefcounts(u64),
126     #[error("unsupported refcount order")]
127     UnsupportedRefcountOrder,
128     #[error("unsupported version: {0}")]
129     UnsupportedVersion(u32),
130     #[error("failed to write header: {0}")]
131     WritingHeader(io::Error),
132 }
133 
134 pub type Result<T> = std::result::Result<T, Error>;
135 
136 // Maximum data size supported.
137 const MAX_QCOW_FILE_SIZE: u64 = 0x01 << 44; // 16 TB.
138 
139 // QCOW magic constant that starts the header.
140 pub const QCOW_MAGIC: u32 = 0x5146_49fb;
141 // Default to a cluster size of 2^DEFAULT_CLUSTER_BITS
142 const DEFAULT_CLUSTER_BITS: u32 = 16;
143 // Limit clusters to reasonable sizes. Choose the same limits as qemu. Making the clusters smaller
144 // increases the amount of overhead for book keeping.
145 const MIN_CLUSTER_BITS: u32 = 9;
146 const MAX_CLUSTER_BITS: u32 = 21;
147 // The L1 and RefCount table are kept in RAM, only handle files that require less than 35M entries.
148 // This easily covers 1 TB files. When support for bigger files is needed the assumptions made to
149 // keep these tables in RAM needs to be thrown out.
150 const MAX_RAM_POINTER_TABLE_SIZE: u64 = 35_000_000;
151 // Only support 2 byte refcounts, 2^refcount_order bits.
152 const DEFAULT_REFCOUNT_ORDER: u32 = 4;
153 
154 const V3_BARE_HEADER_SIZE: u32 = 104;
155 
156 // bits 0-8 and 56-63 are reserved.
157 const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
158 const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
159 // Flags
160 const COMPRESSED_FLAG: u64 = 1 << 62;
161 const CLUSTER_USED_FLAG: u64 = 1 << 63;
162 const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1 << 0;
163 
164 // The format supports a "header extension area", that crosvm does not use.
165 const QCOW_EMPTY_HEADER_EXTENSION_SIZE: u32 = 8;
166 
167 // Defined by the specification
168 const MAX_BACKING_FILE_SIZE: u32 = 1023;
169 
170 /// Contains the information from the header of a qcow file.
171 #[derive(Clone, Debug)]
172 pub struct QcowHeader {
173     pub magic: u32,
174     pub version: u32,
175 
176     pub backing_file_offset: u64,
177     pub backing_file_size: u32,
178 
179     pub cluster_bits: u32,
180     pub size: u64,
181     pub crypt_method: u32,
182 
183     pub l1_size: u32,
184     pub l1_table_offset: u64,
185 
186     pub refcount_table_offset: u64,
187     pub refcount_table_clusters: u32,
188 
189     pub nb_snapshots: u32,
190     pub snapshots_offset: u64,
191 
192     // v3 entries
193     pub incompatible_features: u64,
194     pub compatible_features: u64,
195     pub autoclear_features: u64,
196     pub refcount_order: u32,
197     pub header_size: u32,
198 
199     // Post-header entries
200     pub backing_file_path: Option<String>,
201 }
202 
203 // Reads the next u16 from the file.
read_u16_from_file(mut f: &File) -> Result<u16>204 fn read_u16_from_file(mut f: &File) -> Result<u16> {
205     let mut value = [0u8; 2];
206     (&mut f)
207         .read_exact(&mut value)
208         .map_err(Error::ReadingHeader)?;
209     Ok(u16::from_be_bytes(value))
210 }
211 
212 // Reads the next u32 from the file.
read_u32_from_file(mut f: &File) -> Result<u32>213 fn read_u32_from_file(mut f: &File) -> Result<u32> {
214     let mut value = [0u8; 4];
215     (&mut f)
216         .read_exact(&mut value)
217         .map_err(Error::ReadingHeader)?;
218     Ok(u32::from_be_bytes(value))
219 }
220 
221 // Reads the next u64 from the file.
read_u64_from_file(mut f: &File) -> Result<u64>222 fn read_u64_from_file(mut f: &File) -> Result<u64> {
223     let mut value = [0u8; 8];
224     (&mut f)
225         .read_exact(&mut value)
226         .map_err(Error::ReadingHeader)?;
227     Ok(u64::from_be_bytes(value))
228 }
229 
230 impl QcowHeader {
231     /// Creates a QcowHeader from a reference to a file.
new(f: &mut File) -> Result<QcowHeader>232     pub fn new(f: &mut File) -> Result<QcowHeader> {
233         f.seek(SeekFrom::Start(0)).map_err(Error::ReadingHeader)?;
234 
235         let magic = read_u32_from_file(f)?;
236         if magic != QCOW_MAGIC {
237             return Err(Error::InvalidMagic);
238         }
239 
240         let mut header = QcowHeader {
241             magic,
242             version: read_u32_from_file(f)?,
243             backing_file_offset: read_u64_from_file(f)?,
244             backing_file_size: read_u32_from_file(f)?,
245             cluster_bits: read_u32_from_file(f)?,
246             size: read_u64_from_file(f)?,
247             crypt_method: read_u32_from_file(f)?,
248             l1_size: read_u32_from_file(f)?,
249             l1_table_offset: read_u64_from_file(f)?,
250             refcount_table_offset: read_u64_from_file(f)?,
251             refcount_table_clusters: read_u32_from_file(f)?,
252             nb_snapshots: read_u32_from_file(f)?,
253             snapshots_offset: read_u64_from_file(f)?,
254             incompatible_features: read_u64_from_file(f)?,
255             compatible_features: read_u64_from_file(f)?,
256             autoclear_features: read_u64_from_file(f)?,
257             refcount_order: read_u32_from_file(f)?,
258             header_size: read_u32_from_file(f)?,
259             backing_file_path: None,
260         };
261         if header.backing_file_size > MAX_BACKING_FILE_SIZE {
262             return Err(Error::BackingFileTooLong(header.backing_file_size as usize));
263         }
264         if header.backing_file_offset != 0 {
265             f.seek(SeekFrom::Start(header.backing_file_offset))
266                 .map_err(Error::ReadingHeader)?;
267             let mut backing_file_name_bytes = vec![0u8; header.backing_file_size as usize];
268             f.read_exact(&mut backing_file_name_bytes)
269                 .map_err(Error::ReadingHeader)?;
270             header.backing_file_path = Some(
271                 String::from_utf8(backing_file_name_bytes)
272                     .map_err(|err| Error::InvalidBackingFileName(err.utf8_error()))?,
273             );
274         }
275         Ok(header)
276     }
277 
create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader>278     pub fn create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader> {
279         let cluster_bits: u32 = DEFAULT_CLUSTER_BITS;
280         let cluster_size: u32 = 0x01 << cluster_bits;
281         let max_length: usize =
282             (cluster_size - V3_BARE_HEADER_SIZE - QCOW_EMPTY_HEADER_EXTENSION_SIZE) as usize;
283         if let Some(path) = backing_file {
284             if path.len() > max_length {
285                 return Err(Error::BackingFileTooLong(path.len() - max_length));
286             }
287         }
288         // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses.
289         let l2_size: u32 = cluster_size / size_of::<u64>() as u32;
290         let num_clusters: u32 = size.div_ceil(u64::from(cluster_size)) as u32;
291         let num_l2_clusters: u32 = num_clusters.div_ceil(l2_size);
292         let l1_clusters: u32 = num_l2_clusters.div_ceil(cluster_size);
293         let header_clusters = (size_of::<QcowHeader>() as u32).div_ceil(cluster_size);
294         Ok(QcowHeader {
295             magic: QCOW_MAGIC,
296             version: 3,
297             backing_file_offset: (if backing_file.is_none() {
298                 0
299             } else {
300                 V3_BARE_HEADER_SIZE + QCOW_EMPTY_HEADER_EXTENSION_SIZE
301             }) as u64,
302             backing_file_size: backing_file.map_or(0, |x| x.len()) as u32,
303             cluster_bits: DEFAULT_CLUSTER_BITS,
304             size,
305             crypt_method: 0,
306             l1_size: num_l2_clusters,
307             l1_table_offset: u64::from(cluster_size),
308             // The refcount table is after l1 + header.
309             refcount_table_offset: u64::from(cluster_size * (l1_clusters + 1)),
310             refcount_table_clusters: {
311                 // Pre-allocate enough clusters for the entire refcount table as it must be
312                 // continuous in the file. Allocate enough space to refcount all clusters, including
313                 // the refcount clusters.
314                 let max_refcount_clusters = max_refcount_clusters(
315                     DEFAULT_REFCOUNT_ORDER,
316                     cluster_size,
317                     num_clusters + l1_clusters + num_l2_clusters + header_clusters,
318                 ) as u32;
319                 // The refcount table needs to store the offset of each refcount cluster.
320                 (max_refcount_clusters * size_of::<u64>() as u32).div_ceil(cluster_size)
321             },
322             nb_snapshots: 0,
323             snapshots_offset: 0,
324             incompatible_features: 0,
325             compatible_features: 0,
326             autoclear_features: 0,
327             refcount_order: DEFAULT_REFCOUNT_ORDER,
328             header_size: V3_BARE_HEADER_SIZE,
329             backing_file_path: backing_file.map(String::from),
330         })
331     }
332 
333     /// Write the header to `file`.
write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()>334     pub fn write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()> {
335         // Writes the next u32 to the file.
336         fn write_u32_to_file<F: Write>(f: &mut F, value: u32) -> Result<()> {
337             f.write_all(&value.to_be_bytes())
338                 .map_err(Error::WritingHeader)
339         }
340 
341         // Writes the next u64 to the file.
342         fn write_u64_to_file<F: Write>(f: &mut F, value: u64) -> Result<()> {
343             f.write_all(&value.to_be_bytes())
344                 .map_err(Error::WritingHeader)
345         }
346 
347         write_u32_to_file(file, self.magic)?;
348         write_u32_to_file(file, self.version)?;
349         write_u64_to_file(file, self.backing_file_offset)?;
350         write_u32_to_file(file, self.backing_file_size)?;
351         write_u32_to_file(file, self.cluster_bits)?;
352         write_u64_to_file(file, self.size)?;
353         write_u32_to_file(file, self.crypt_method)?;
354         write_u32_to_file(file, self.l1_size)?;
355         write_u64_to_file(file, self.l1_table_offset)?;
356         write_u64_to_file(file, self.refcount_table_offset)?;
357         write_u32_to_file(file, self.refcount_table_clusters)?;
358         write_u32_to_file(file, self.nb_snapshots)?;
359         write_u64_to_file(file, self.snapshots_offset)?;
360         write_u64_to_file(file, self.incompatible_features)?;
361         write_u64_to_file(file, self.compatible_features)?;
362         write_u64_to_file(file, self.autoclear_features)?;
363         write_u32_to_file(file, self.refcount_order)?;
364         write_u32_to_file(file, self.header_size)?;
365         write_u32_to_file(file, 0)?; // header extension type: end of header extension area
366         write_u32_to_file(file, 0)?; // length of header extension data: 0
367         if let Some(backing_file_path) = self.backing_file_path.as_ref() {
368             write!(file, "{}", backing_file_path).map_err(Error::WritingHeader)?;
369         }
370 
371         // Set the file length by seeking and writing a zero to the last byte. This avoids needing
372         // a `File` instead of anything that implements seek as the `file` argument.
373         // Zeros out the l1 and refcount table clusters.
374         let cluster_size = 0x01u64 << self.cluster_bits;
375         let refcount_blocks_size = u64::from(self.refcount_table_clusters) * cluster_size;
376         file.seek(SeekFrom::Start(
377             self.refcount_table_offset + refcount_blocks_size - 2,
378         ))
379         .map_err(Error::WritingHeader)?;
380         file.write(&[0u8]).map_err(Error::WritingHeader)?;
381 
382         Ok(())
383     }
384 }
385 
max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64386 fn max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64 {
387     // Use u64 as the product of the u32 inputs can overflow.
388     let refcount_bytes = (0x01 << refcount_order as u64) / 8;
389     let for_data = (u64::from(num_clusters) * refcount_bytes).div_ceil(u64::from(cluster_size));
390     let for_refcounts = (for_data * refcount_bytes).div_ceil(u64::from(cluster_size));
391     for_data + for_refcounts
392 }
393 
394 /// Represents a qcow2 file. This is a sparse file format maintained by the qemu project.
395 /// Full documentation of the format can be found in the qemu repository.
396 ///
397 /// # Example
398 ///
399 /// ```
400 /// # use base::FileReadWriteAtVolatile;
401 /// # use disk::QcowFile;
402 /// # use base::VolatileSlice;
403 /// # fn test(file: std::fs::File) -> std::io::Result<()> {
404 ///     let mut q = QcowFile::from(file, disk::MAX_NESTING_DEPTH).expect("Can't open qcow file");
405 ///     let mut buf = [0u8; 12];
406 ///     let mut vslice = VolatileSlice::new(&mut buf);
407 ///     q.read_at_volatile(vslice, 10)?;
408 /// #   Ok(())
409 /// # }
410 /// ```
411 #[derive(Debug)]
412 pub struct QcowFile {
413     raw_file: QcowRawFile,
414     header: QcowHeader,
415     l1_table: VecCache<u64>,
416     l2_entries: u64,
417     l2_cache: CacheMap<VecCache<u64>>,
418     refcounts: RefCount,
419     current_offset: u64,
420     unref_clusters: Vec<u64>, // List of freshly unreferenced clusters.
421     // List of unreferenced clusters available to be used. unref clusters become available once the
422     // removal of references to them have been synced to disk.
423     avail_clusters: Vec<u64>,
424     backing_file: Option<Box<dyn DiskFile>>,
425 }
426 
427 impl DiskFile for QcowFile {}
428 
429 impl DiskFlush for QcowFile {
flush(&mut self) -> io::Result<()>430     fn flush(&mut self) -> io::Result<()> {
431         // Using fsync is overkill here, but, the code for flushing state to file tangled up with
432         // the fsync, so it is best we can do for now.
433         self.fsync()
434     }
435 }
436 
437 impl QcowFile {
438     /// Creates a QcowFile from `file`. File must be a valid qcow2 image.
from(mut file: File, max_nesting_depth: u32) -> Result<QcowFile>439     pub fn from(mut file: File, max_nesting_depth: u32) -> Result<QcowFile> {
440         let header = QcowHeader::new(&mut file)?;
441 
442         // Only v3 files are supported.
443         if header.version != 3 {
444             return Err(Error::UnsupportedVersion(header.version));
445         }
446 
447         // Make sure that the L1 table fits in RAM.
448         if u64::from(header.l1_size) > MAX_RAM_POINTER_TABLE_SIZE {
449             return Err(Error::InvalidL1TableSize(header.l1_size));
450         }
451 
452         let cluster_bits: u32 = header.cluster_bits;
453         if !(MIN_CLUSTER_BITS..=MAX_CLUSTER_BITS).contains(&cluster_bits) {
454             return Err(Error::InvalidClusterSize);
455         }
456         let cluster_size = 0x01u64 << cluster_bits;
457 
458         // Limit the total size of the disk.
459         if header.size > MAX_QCOW_FILE_SIZE {
460             return Err(Error::FileTooBig(header.size));
461         }
462 
463         let backing_file = if let Some(backing_file_path) = header.backing_file_path.as_ref() {
464             let path = backing_file_path.clone();
465             let backing_raw_file = open_file_or_duplicate(
466                 Path::new(&path),
467                 OpenOptions::new().read(true), // TODO(b/190435784): Add support for O_DIRECT.
468             )
469             .map_err(|e| Error::BackingFileIo(e.into()))?;
470             // is_sparse_file is false because qcow is internally sparse and we don't need file
471             // system sparseness on top of that.
472             let backing_file = create_disk_file(
473                 backing_raw_file,
474                 /* is_sparse_file= */ false,
475                 max_nesting_depth,
476                 Path::new(&path),
477             )
478             .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
479             Some(backing_file)
480         } else {
481             None
482         };
483 
484         // Only support two byte refcounts.
485         let refcount_bits: u64 = 0x01u64
486             .checked_shl(header.refcount_order)
487             .ok_or(Error::UnsupportedRefcountOrder)?;
488         if refcount_bits != 16 {
489             return Err(Error::UnsupportedRefcountOrder);
490         }
491         let refcount_bytes = (refcount_bits + 7) / 8;
492 
493         // Need at least one refcount cluster
494         if header.refcount_table_clusters == 0 {
495             return Err(Error::NoRefcountClusters);
496         }
497         offset_is_cluster_boundary(header.l1_table_offset, header.cluster_bits)?;
498         offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?;
499         // refcount table must be a cluster boundary, and within the file's virtual or actual size.
500         offset_is_cluster_boundary(header.refcount_table_offset, header.cluster_bits)?;
501         let file_size = file.metadata().map_err(Error::GettingFileSize)?.len();
502         if header.refcount_table_offset > max(file_size, header.size) {
503             return Err(Error::RefcountTableOffEnd);
504         }
505 
506         // The first cluster should always have a non-zero refcount, so if it is 0,
507         // this is an old file with broken refcounts, which requires a rebuild.
508         let mut refcount_rebuild_required = true;
509         file.seek(SeekFrom::Start(header.refcount_table_offset))
510             .map_err(Error::SeekingFile)?;
511         let first_refblock_addr = read_u64_from_file(&file)?;
512         if first_refblock_addr != 0 {
513             file.seek(SeekFrom::Start(first_refblock_addr))
514                 .map_err(Error::SeekingFile)?;
515             let first_cluster_refcount = read_u16_from_file(&file)?;
516             if first_cluster_refcount != 0 {
517                 refcount_rebuild_required = false;
518             }
519         }
520 
521         if (header.compatible_features & COMPATIBLE_FEATURES_LAZY_REFCOUNTS) != 0 {
522             refcount_rebuild_required = true;
523         }
524 
525         let mut raw_file =
526             QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?;
527         if refcount_rebuild_required {
528             QcowFile::rebuild_refcounts(&mut raw_file, header.clone())?;
529         }
530 
531         let l2_size = cluster_size / size_of::<u64>() as u64;
532         let num_clusters = header.size.div_ceil(cluster_size);
533         let num_l2_clusters = num_clusters.div_ceil(l2_size);
534         let l1_clusters = num_l2_clusters.div_ceil(cluster_size);
535         let header_clusters = (size_of::<QcowHeader>() as u64).div_ceil(cluster_size);
536         if num_l2_clusters > MAX_RAM_POINTER_TABLE_SIZE {
537             return Err(Error::TooManyL1Entries(num_l2_clusters));
538         }
539         let l1_table = VecCache::from_vec(
540             raw_file
541                 .read_pointer_table(
542                     header.l1_table_offset,
543                     num_l2_clusters,
544                     Some(L1_TABLE_OFFSET_MASK),
545                 )
546                 .map_err(Error::ReadingHeader)?,
547         );
548 
549         let num_clusters = header.size.div_ceil(cluster_size);
550         let refcount_clusters = max_refcount_clusters(
551             header.refcount_order,
552             cluster_size as u32,
553             (num_clusters + l1_clusters + num_l2_clusters + header_clusters) as u32,
554         );
555         // Check that the given header doesn't have a suspiciously sized refcount table.
556         if u64::from(header.refcount_table_clusters) > 2 * refcount_clusters {
557             return Err(Error::RefcountTableTooLarge);
558         }
559         if l1_clusters + refcount_clusters > MAX_RAM_POINTER_TABLE_SIZE {
560             return Err(Error::TooManyRefcounts(refcount_clusters));
561         }
562         let refcount_block_entries = cluster_size / refcount_bytes;
563         let refcounts = RefCount::new(
564             &mut raw_file,
565             header.refcount_table_offset,
566             refcount_clusters,
567             refcount_block_entries,
568             cluster_size,
569         )
570         .map_err(Error::ReadingRefCounts)?;
571 
572         let l2_entries = cluster_size / size_of::<u64>() as u64;
573 
574         let mut qcow = QcowFile {
575             raw_file,
576             header,
577             l1_table,
578             l2_entries,
579             l2_cache: CacheMap::new(100),
580             refcounts,
581             current_offset: 0,
582             unref_clusters: Vec::new(),
583             avail_clusters: Vec::new(),
584             backing_file,
585         };
586 
587         // Check that the L1 and refcount tables fit in a 64bit address space.
588         qcow.header
589             .l1_table_offset
590             .checked_add(qcow.l1_address_offset(qcow.virtual_size()))
591             .ok_or(Error::InvalidL1TableOffset)?;
592         qcow.header
593             .refcount_table_offset
594             .checked_add(u64::from(qcow.header.refcount_table_clusters) * cluster_size)
595             .ok_or(Error::InvalidRefcountTableOffset)?;
596 
597         qcow.find_avail_clusters()?;
598 
599         Ok(qcow)
600     }
601 
602     /// Creates a new QcowFile at the given path.
new(file: File, virtual_size: u64) -> Result<QcowFile>603     pub fn new(file: File, virtual_size: u64) -> Result<QcowFile> {
604         let header = QcowHeader::create_for_size_and_path(virtual_size, None)?;
605         QcowFile::new_from_header(file, header, 1)
606     }
607 
608     /// Creates a new QcowFile at the given path.
new_from_backing( file: File, backing_file_name: &str, backing_file_max_nesting_depth: u32, ) -> Result<QcowFile>609     pub fn new_from_backing(
610         file: File,
611         backing_file_name: &str,
612         backing_file_max_nesting_depth: u32,
613     ) -> Result<QcowFile> {
614         let backing_path = Path::new(backing_file_name);
615         let backing_raw_file = open_file_or_duplicate(
616             backing_path,
617             OpenOptions::new().read(true), // TODO(b/190435784): add support for O_DIRECT.
618         )
619         .map_err(|e| Error::BackingFileIo(e.into()))?;
620         // is_sparse_file is false because qcow is internally sparse and we don't need file
621         // system sparseness on top of that.
622         let backing_file = create_disk_file(
623             backing_raw_file,
624             /* is_sparse_file= */ false,
625             backing_file_max_nesting_depth,
626             backing_path,
627         )
628         .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
629         let size = backing_file.get_len().map_err(Error::BackingFileIo)?;
630         let header = QcowHeader::create_for_size_and_path(size, Some(backing_file_name))?;
631         let mut result = QcowFile::new_from_header(file, header, backing_file_max_nesting_depth)?;
632         result.backing_file = Some(backing_file);
633         Ok(result)
634     }
635 
new_from_header( mut file: File, header: QcowHeader, max_nesting_depth: u32, ) -> Result<QcowFile>636     fn new_from_header(
637         mut file: File,
638         header: QcowHeader,
639         max_nesting_depth: u32,
640     ) -> Result<QcowFile> {
641         file.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?;
642         header.write_to(&mut file)?;
643 
644         let mut qcow = Self::from(file, max_nesting_depth)?;
645 
646         // Set the refcount for each refcount table cluster.
647         let cluster_size = 0x01u64 << qcow.header.cluster_bits;
648         let refcount_table_base = qcow.header.refcount_table_offset;
649         let end_cluster_addr =
650             refcount_table_base + u64::from(qcow.header.refcount_table_clusters) * cluster_size;
651 
652         let mut cluster_addr = 0;
653         while cluster_addr < end_cluster_addr {
654             let mut unref_clusters = qcow
655                 .set_cluster_refcount(cluster_addr, 1)
656                 .map_err(Error::SettingRefcountRefcount)?;
657             qcow.unref_clusters.append(&mut unref_clusters);
658             cluster_addr += cluster_size;
659         }
660 
661         Ok(qcow)
662     }
663 
set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>)664     pub fn set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>) {
665         self.backing_file = backing;
666     }
667 
668     /// Returns the first cluster in the file with a 0 refcount. Used for testing.
first_zero_refcount(&mut self) -> Result<Option<u64>>669     pub fn first_zero_refcount(&mut self) -> Result<Option<u64>> {
670         let file_size = self
671             .raw_file
672             .file_mut()
673             .metadata()
674             .map_err(Error::GettingFileSize)?
675             .len();
676         let cluster_size = 0x01u64 << self.header.cluster_bits;
677 
678         let mut cluster_addr = 0;
679         while cluster_addr < file_size {
680             let cluster_refcount = self
681                 .refcounts
682                 .get_cluster_refcount(&mut self.raw_file, cluster_addr)
683                 .map_err(Error::GettingRefcount)?;
684             if cluster_refcount == 0 {
685                 return Ok(Some(cluster_addr));
686             }
687             cluster_addr += cluster_size;
688         }
689         Ok(None)
690     }
691 
find_avail_clusters(&mut self) -> Result<()>692     fn find_avail_clusters(&mut self) -> Result<()> {
693         let cluster_size = self.raw_file.cluster_size();
694 
695         let file_size = self
696             .raw_file
697             .file_mut()
698             .metadata()
699             .map_err(Error::GettingFileSize)?
700             .len();
701 
702         for i in (0..file_size).step_by(cluster_size as usize) {
703             let refcount = self
704                 .refcounts
705                 .get_cluster_refcount(&mut self.raw_file, i)
706                 .map_err(Error::GettingRefcount)?;
707             if refcount == 0 {
708                 self.avail_clusters.push(i);
709             }
710         }
711 
712         Ok(())
713     }
714 
715     /// Rebuild the reference count tables.
rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()>716     fn rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()> {
717         fn add_ref(refcounts: &mut [u16], cluster_size: u64, cluster_address: u64) -> Result<()> {
718             let idx = (cluster_address / cluster_size) as usize;
719             if idx >= refcounts.len() {
720                 return Err(Error::InvalidClusterIndex);
721             }
722             refcounts[idx] += 1;
723             Ok(())
724         }
725 
726         // Add a reference to the first cluster (header plus extensions).
727         fn set_header_refcount(refcounts: &mut [u16], cluster_size: u64) -> Result<()> {
728             add_ref(refcounts, cluster_size, 0)
729         }
730 
731         // Add references to the L1 table clusters.
732         fn set_l1_refcounts(
733             refcounts: &mut [u16],
734             header: QcowHeader,
735             cluster_size: u64,
736         ) -> Result<()> {
737             let l1_clusters = u64::from(header.l1_size).div_ceil(cluster_size);
738             let l1_table_offset = header.l1_table_offset;
739             for i in 0..l1_clusters {
740                 add_ref(refcounts, cluster_size, l1_table_offset + i * cluster_size)?;
741             }
742             Ok(())
743         }
744 
745         // Traverse the L1 and L2 tables to find all reachable data clusters.
746         fn set_data_refcounts(
747             refcounts: &mut [u16],
748             header: QcowHeader,
749             cluster_size: u64,
750             raw_file: &mut QcowRawFile,
751         ) -> Result<()> {
752             let l1_table = raw_file
753                 .read_pointer_table(
754                     header.l1_table_offset,
755                     header.l1_size as u64,
756                     Some(L1_TABLE_OFFSET_MASK),
757                 )
758                 .map_err(Error::ReadingPointers)?;
759             for l1_index in 0..header.l1_size as usize {
760                 let l2_addr_disk = *l1_table.get(l1_index).ok_or(Error::InvalidIndex)?;
761                 if l2_addr_disk != 0 {
762                     // Add a reference to the L2 table cluster itself.
763                     add_ref(refcounts, cluster_size, l2_addr_disk)?;
764 
765                     // Read the L2 table and find all referenced data clusters.
766                     let l2_table = raw_file
767                         .read_pointer_table(
768                             l2_addr_disk,
769                             cluster_size / size_of::<u64>() as u64,
770                             Some(L2_TABLE_OFFSET_MASK),
771                         )
772                         .map_err(Error::ReadingPointers)?;
773                     for data_cluster_addr in l2_table {
774                         if data_cluster_addr != 0 {
775                             add_ref(refcounts, cluster_size, data_cluster_addr)?;
776                         }
777                     }
778                 }
779             }
780 
781             Ok(())
782         }
783 
784         // Add references to the top-level refcount table clusters.
785         fn set_refcount_table_refcounts(
786             refcounts: &mut [u16],
787             header: QcowHeader,
788             cluster_size: u64,
789         ) -> Result<()> {
790             let refcount_table_offset = header.refcount_table_offset;
791             for i in 0..header.refcount_table_clusters as u64 {
792                 add_ref(
793                     refcounts,
794                     cluster_size,
795                     refcount_table_offset + i * cluster_size,
796                 )?;
797             }
798             Ok(())
799         }
800 
801         // Allocate clusters for refblocks.
802         // This needs to be done last so that we have the correct refcounts for all other
803         // clusters.
804         fn alloc_refblocks(
805             refcounts: &mut [u16],
806             cluster_size: u64,
807             refblock_clusters: u64,
808             pointers_per_cluster: u64,
809         ) -> Result<Vec<u64>> {
810             let refcount_table_entries = refblock_clusters.div_ceil(pointers_per_cluster);
811             let mut ref_table = vec![0; refcount_table_entries as usize];
812             let mut first_free_cluster: u64 = 0;
813             for refblock_addr in &mut ref_table {
814                 loop {
815                     if first_free_cluster >= refcounts.len() as u64 {
816                         return Err(Error::NotEnoughSpaceForRefcounts);
817                     }
818                     if refcounts[first_free_cluster as usize] == 0 {
819                         break;
820                     }
821                     first_free_cluster += 1;
822                 }
823 
824                 *refblock_addr = first_free_cluster * cluster_size;
825                 add_ref(refcounts, cluster_size, *refblock_addr)?;
826 
827                 first_free_cluster += 1;
828             }
829 
830             Ok(ref_table)
831         }
832 
833         // Write the updated reference count blocks and reftable.
834         fn write_refblocks(
835             refcounts: &[u16],
836             mut header: QcowHeader,
837             ref_table: &[u64],
838             raw_file: &mut QcowRawFile,
839             refcount_block_entries: u64,
840         ) -> Result<()> {
841             // Rewrite the header with lazy refcounts enabled while we are rebuilding the tables.
842             header.compatible_features |= COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
843             raw_file
844                 .file_mut()
845                 .seek(SeekFrom::Start(0))
846                 .map_err(Error::SeekingFile)?;
847             header.write_to(raw_file.file_mut())?;
848 
849             for (i, refblock_addr) in ref_table.iter().enumerate() {
850                 // Write a block of refcounts to the location indicated by refblock_addr.
851                 let refblock_start = i * (refcount_block_entries as usize);
852                 let refblock_end = min(
853                     refcounts.len(),
854                     refblock_start + refcount_block_entries as usize,
855                 );
856                 let refblock = &refcounts[refblock_start..refblock_end];
857                 raw_file
858                     .write_refcount_block(*refblock_addr, refblock)
859                     .map_err(Error::WritingHeader)?;
860 
861                 // If this is the last (partial) cluster, pad it out to a full refblock cluster.
862                 if refblock.len() < refcount_block_entries as usize {
863                     let refblock_padding =
864                         vec![0u16; refcount_block_entries as usize - refblock.len()];
865                     raw_file
866                         .write_refcount_block(
867                             *refblock_addr + refblock.len() as u64 * 2,
868                             &refblock_padding,
869                         )
870                         .map_err(Error::WritingHeader)?;
871                 }
872             }
873 
874             // Rewrite the top-level refcount table.
875             raw_file
876                 .write_pointer_table(header.refcount_table_offset, ref_table, 0)
877                 .map_err(Error::WritingHeader)?;
878 
879             // Rewrite the header again, now with lazy refcounts disabled.
880             header.compatible_features &= !COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
881             raw_file
882                 .file_mut()
883                 .seek(SeekFrom::Start(0))
884                 .map_err(Error::SeekingFile)?;
885             header.write_to(raw_file.file_mut())?;
886 
887             Ok(())
888         }
889 
890         let cluster_size = raw_file.cluster_size();
891 
892         let file_size = raw_file
893             .file_mut()
894             .metadata()
895             .map_err(Error::GettingFileSize)?
896             .len();
897 
898         let refcount_bits = 1u64 << header.refcount_order;
899         let refcount_bytes = refcount_bits.div_ceil(8);
900         let refcount_block_entries = cluster_size / refcount_bytes;
901         let pointers_per_cluster = cluster_size / size_of::<u64>() as u64;
902         let data_clusters = header.size.div_ceil(cluster_size);
903         let l2_clusters = data_clusters.div_ceil(pointers_per_cluster);
904         let l1_clusters = l2_clusters.div_ceil(cluster_size);
905         let header_clusters = (size_of::<QcowHeader>() as u64).div_ceil(cluster_size);
906         let max_clusters = data_clusters + l2_clusters + l1_clusters + header_clusters;
907         let mut max_valid_cluster_index = max_clusters;
908         let refblock_clusters = max_valid_cluster_index.div_ceil(refcount_block_entries);
909         let reftable_clusters = refblock_clusters.div_ceil(pointers_per_cluster);
910         // Account for refblocks and the ref table size needed to address them.
911         let refblocks_for_refs =
912             (refblock_clusters + reftable_clusters).div_ceil(refcount_block_entries);
913         let reftable_clusters_for_refs = refblocks_for_refs.div_ceil(refcount_block_entries);
914         max_valid_cluster_index += refblock_clusters + reftable_clusters;
915         max_valid_cluster_index += refblocks_for_refs + reftable_clusters_for_refs;
916 
917         if max_valid_cluster_index > MAX_RAM_POINTER_TABLE_SIZE {
918             return Err(Error::InvalidRefcountTableSize(max_valid_cluster_index));
919         }
920 
921         let max_valid_cluster_offset = max_valid_cluster_index * cluster_size;
922         if max_valid_cluster_offset < file_size - cluster_size {
923             return Err(Error::InvalidRefcountTableSize(max_valid_cluster_offset));
924         }
925 
926         let mut refcounts = vec![0; max_valid_cluster_index as usize];
927 
928         // Find all references clusters and rebuild refcounts.
929         set_header_refcount(&mut refcounts, cluster_size)?;
930         set_l1_refcounts(&mut refcounts, header.clone(), cluster_size)?;
931         set_data_refcounts(&mut refcounts, header.clone(), cluster_size, raw_file)?;
932         set_refcount_table_refcounts(&mut refcounts, header.clone(), cluster_size)?;
933 
934         // Allocate clusters to store the new reference count blocks.
935         let ref_table = alloc_refblocks(
936             &mut refcounts,
937             cluster_size,
938             refblock_clusters,
939             pointers_per_cluster,
940         )?;
941 
942         // Write updated reference counts and point the reftable at them.
943         write_refblocks(
944             &refcounts,
945             header,
946             &ref_table,
947             raw_file,
948             refcount_block_entries,
949         )
950     }
951 
952     // Limits the range so that it doesn't exceed the virtual size of the file.
limit_range_file(&self, address: u64, count: usize) -> usize953     fn limit_range_file(&self, address: u64, count: usize) -> usize {
954         if address.checked_add(count as u64).is_none() || address > self.virtual_size() {
955             return 0;
956         }
957         min(count as u64, self.virtual_size() - address) as usize
958     }
959 
960     // Limits the range so that it doesn't overflow the end of a cluster.
limit_range_cluster(&self, address: u64, count: usize) -> usize961     fn limit_range_cluster(&self, address: u64, count: usize) -> usize {
962         let offset: u64 = self.raw_file.cluster_offset(address);
963         let limit = self.raw_file.cluster_size() - offset;
964         min(count as u64, limit) as usize
965     }
966 
967     // Gets the maximum virtual size of this image.
virtual_size(&self) -> u64968     fn virtual_size(&self) -> u64 {
969         self.header.size
970     }
971 
972     // Gets the offset of `address` in the L1 table.
l1_address_offset(&self, address: u64) -> u64973     fn l1_address_offset(&self, address: u64) -> u64 {
974         let l1_index = self.l1_table_index(address);
975         l1_index * size_of::<u64>() as u64
976     }
977 
978     // Gets the offset of `address` in the L1 table.
l1_table_index(&self, address: u64) -> u64979     fn l1_table_index(&self, address: u64) -> u64 {
980         (address / self.raw_file.cluster_size()) / self.l2_entries
981     }
982 
983     // Gets the offset of `address` in the L2 table.
l2_table_index(&self, address: u64) -> u64984     fn l2_table_index(&self, address: u64) -> u64 {
985         (address / self.raw_file.cluster_size()) % self.l2_entries
986     }
987 
988     // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters have
989     // yet to be allocated, return None.
file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>>990     fn file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>> {
991         if address >= self.virtual_size() {
992             return Err(std::io::Error::from_raw_os_error(EINVAL));
993         }
994 
995         let l1_index = self.l1_table_index(address) as usize;
996         let l2_addr_disk = *self
997             .l1_table
998             .get(l1_index)
999             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1000 
1001         if l2_addr_disk == 0 {
1002             // Reading from an unallocated cluster will return zeros.
1003             return Ok(None);
1004         }
1005 
1006         let l2_index = self.l2_table_index(address) as usize;
1007 
1008         if !self.l2_cache.contains_key(&l1_index) {
1009             // Not in the cache.
1010             let table =
1011                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1012 
1013             let l1_table = &self.l1_table;
1014             let raw_file = &mut self.raw_file;
1015             self.l2_cache.insert(l1_index, table, |index, evicted| {
1016                 raw_file.write_pointer_table(
1017                     l1_table[index],
1018                     evicted.get_values(),
1019                     CLUSTER_USED_FLAG,
1020                 )
1021             })?;
1022         };
1023 
1024         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1025         if cluster_addr == 0 {
1026             return Ok(None);
1027         }
1028         Ok(Some(cluster_addr + self.raw_file.cluster_offset(address)))
1029     }
1030 
1031     // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters need
1032     // to be allocated, they will be.
file_offset_write(&mut self, address: u64) -> std::io::Result<u64>1033     fn file_offset_write(&mut self, address: u64) -> std::io::Result<u64> {
1034         if address >= self.virtual_size() {
1035             return Err(std::io::Error::from_raw_os_error(EINVAL));
1036         }
1037 
1038         let l1_index = self.l1_table_index(address) as usize;
1039         let l2_addr_disk = *self
1040             .l1_table
1041             .get(l1_index)
1042             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1043         let l2_index = self.l2_table_index(address) as usize;
1044 
1045         let mut set_refcounts = Vec::new();
1046 
1047         if !self.l2_cache.contains_key(&l1_index) {
1048             // Not in the cache.
1049             let l2_table = if l2_addr_disk == 0 {
1050                 // Allocate a new cluster to store the L2 table and update the L1 table to point
1051                 // to the new table.
1052                 let new_addr: u64 = self.get_new_cluster(None)?;
1053                 // The cluster refcount starts at one meaning it is used but doesn't need COW.
1054                 set_refcounts.push((new_addr, 1));
1055                 self.l1_table[l1_index] = new_addr;
1056                 VecCache::new(self.l2_entries as usize)
1057             } else {
1058                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?)
1059             };
1060             let l1_table = &self.l1_table;
1061             let raw_file = &mut self.raw_file;
1062             self.l2_cache.insert(l1_index, l2_table, |index, evicted| {
1063                 raw_file.write_pointer_table(
1064                     l1_table[index],
1065                     evicted.get_values(),
1066                     CLUSTER_USED_FLAG,
1067                 )
1068             })?;
1069         }
1070 
1071         let cluster_addr = match self.l2_cache.get(&l1_index).unwrap()[l2_index] {
1072             0 => {
1073                 let initial_data = if let Some(backing) = self.backing_file.as_mut() {
1074                     let cluster_size = self.raw_file.cluster_size();
1075                     let cluster_begin = address - (address % cluster_size);
1076                     let mut cluster_data = vec![0u8; cluster_size as usize];
1077                     let volatile_slice = VolatileSlice::new(&mut cluster_data);
1078                     backing.read_exact_at_volatile(volatile_slice, cluster_begin)?;
1079                     Some(cluster_data)
1080                 } else {
1081                     None
1082                 };
1083                 // Need to allocate a data cluster
1084                 let cluster_addr = self.append_data_cluster(initial_data)?;
1085                 self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?;
1086                 cluster_addr
1087             }
1088             a => a,
1089         };
1090 
1091         for (addr, count) in set_refcounts {
1092             let mut newly_unref = self.set_cluster_refcount(addr, count)?;
1093             self.unref_clusters.append(&mut newly_unref);
1094         }
1095 
1096         Ok(cluster_addr + self.raw_file.cluster_offset(address))
1097     }
1098 
1099     // Updates the l1 and l2 tables to point to the new `cluster_addr`.
update_cluster_addr( &mut self, l1_index: usize, l2_index: usize, cluster_addr: u64, set_refcounts: &mut Vec<(u64, u16)>, ) -> io::Result<()>1100     fn update_cluster_addr(
1101         &mut self,
1102         l1_index: usize,
1103         l2_index: usize,
1104         cluster_addr: u64,
1105         set_refcounts: &mut Vec<(u64, u16)>,
1106     ) -> io::Result<()> {
1107         if !self.l2_cache.get(&l1_index).unwrap().dirty() {
1108             // Free the previously used cluster if one exists. Modified tables are always
1109             // witten to new clusters so the L1 table can be committed to disk after they
1110             // are and L1 never points at an invalid table.
1111             // The index must be valid from when it was insterted.
1112             let addr = self.l1_table[l1_index];
1113             if addr != 0 {
1114                 self.unref_clusters.push(addr);
1115                 set_refcounts.push((addr, 0));
1116             }
1117 
1118             // Allocate a new cluster to store the L2 table and update the L1 table to point
1119             // to the new table. The cluster will be written when the cache is flushed, no
1120             // need to copy the data now.
1121             let new_addr: u64 = self.get_new_cluster(None)?;
1122             // The cluster refcount starts at one indicating it is used but doesn't need
1123             // COW.
1124             set_refcounts.push((new_addr, 1));
1125             self.l1_table[l1_index] = new_addr;
1126         }
1127         // 'unwrap' is OK because it was just added.
1128         self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = cluster_addr;
1129         Ok(())
1130     }
1131 
1132     // Allocate a new cluster and return its offset within the raw file.
get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64>1133     fn get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1134         // First use a pre allocated cluster if one is available.
1135         if let Some(free_cluster) = self.avail_clusters.pop() {
1136             if let Some(initial_data) = initial_data {
1137                 self.raw_file.write_cluster(free_cluster, initial_data)?;
1138             } else {
1139                 self.raw_file.zero_cluster(free_cluster)?;
1140             }
1141             return Ok(free_cluster);
1142         }
1143 
1144         let max_valid_cluster_offset = self.refcounts.max_valid_cluster_offset();
1145         if let Some(new_cluster) = self.raw_file.add_cluster_end(max_valid_cluster_offset)? {
1146             if let Some(initial_data) = initial_data {
1147                 self.raw_file.write_cluster(new_cluster, initial_data)?;
1148             }
1149             Ok(new_cluster)
1150         } else {
1151             error!("No free clusters in get_new_cluster()");
1152             Err(std::io::Error::from_raw_os_error(ENOSPC))
1153         }
1154     }
1155 
1156     // Allocate and initialize a new data cluster. Returns the offset of the
1157     // cluster in to the file on success.
append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64>1158     fn append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1159         let new_addr: u64 = self.get_new_cluster(initial_data)?;
1160         // The cluster refcount starts at one indicating it is used but doesn't need COW.
1161         let mut newly_unref = self.set_cluster_refcount(new_addr, 1)?;
1162         self.unref_clusters.append(&mut newly_unref);
1163         Ok(new_addr)
1164     }
1165 
1166     // Deallocate the storage for the cluster starting at `address`.
1167     // Any future reads of this cluster will return all zeroes (or the backing file, if in use).
deallocate_cluster(&mut self, address: u64) -> std::io::Result<()>1168     fn deallocate_cluster(&mut self, address: u64) -> std::io::Result<()> {
1169         if address >= self.virtual_size() {
1170             return Err(std::io::Error::from_raw_os_error(EINVAL));
1171         }
1172 
1173         let l1_index = self.l1_table_index(address) as usize;
1174         let l2_addr_disk = *self
1175             .l1_table
1176             .get(l1_index)
1177             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1178         let l2_index = self.l2_table_index(address) as usize;
1179 
1180         if l2_addr_disk == 0 {
1181             // The whole L2 table for this address is not allocated yet,
1182             // so the cluster must also be unallocated.
1183             return Ok(());
1184         }
1185 
1186         if !self.l2_cache.contains_key(&l1_index) {
1187             // Not in the cache.
1188             let table =
1189                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1190             let l1_table = &self.l1_table;
1191             let raw_file = &mut self.raw_file;
1192             self.l2_cache.insert(l1_index, table, |index, evicted| {
1193                 raw_file.write_pointer_table(
1194                     l1_table[index],
1195                     evicted.get_values(),
1196                     CLUSTER_USED_FLAG,
1197                 )
1198             })?;
1199         }
1200 
1201         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1202         if cluster_addr == 0 {
1203             // This cluster is already unallocated; nothing to do.
1204             return Ok(());
1205         }
1206 
1207         // Decrement the refcount.
1208         let refcount = self
1209             .refcounts
1210             .get_cluster_refcount(&mut self.raw_file, cluster_addr)
1211             .map_err(|_| std::io::Error::from_raw_os_error(EINVAL))?;
1212         if refcount == 0 {
1213             return Err(std::io::Error::from_raw_os_error(EINVAL));
1214         }
1215 
1216         let new_refcount = refcount - 1;
1217         let mut newly_unref = self.set_cluster_refcount(cluster_addr, new_refcount)?;
1218         self.unref_clusters.append(&mut newly_unref);
1219 
1220         // Rewrite the L2 entry to remove the cluster mapping.
1221         // unwrap is safe as we just checked/inserted this entry.
1222         self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = 0;
1223 
1224         if new_refcount == 0 {
1225             let cluster_size = self.raw_file.cluster_size();
1226             // This cluster is no longer in use; deallocate the storage.
1227             // The underlying FS may not support FALLOC_FL_PUNCH_HOLE,
1228             // so don't treat an error as fatal.  Future reads will return zeros anyways.
1229             let _ = self
1230                 .raw_file
1231                 .file_mut()
1232                 .punch_hole_mut(cluster_addr, cluster_size);
1233             self.unref_clusters.push(cluster_addr);
1234         }
1235         Ok(())
1236     }
1237 
1238     // Fill a range of `length` bytes starting at `address` with zeroes.
1239     // Any future reads of this range will return all zeroes.
1240     // If there is no backing file, this will deallocate cluster storage when possible.
zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()>1241     fn zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()> {
1242         let write_count: usize = self.limit_range_file(address, length);
1243 
1244         let mut nwritten: usize = 0;
1245         while nwritten < write_count {
1246             let curr_addr = address + nwritten as u64;
1247             let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1248 
1249             if self.backing_file.is_none() && count == self.raw_file.cluster_size() as usize {
1250                 // Full cluster and no backing file in use - deallocate the storage.
1251                 self.deallocate_cluster(curr_addr)?;
1252             } else {
1253                 // Partial cluster - zero out the relevant bytes.
1254                 let offset = if self.backing_file.is_some() {
1255                     // There is a backing file, so we need to allocate a cluster in order to
1256                     // zero out the hole-punched bytes such that the backing file contents do not
1257                     // show through.
1258                     Some(self.file_offset_write(curr_addr)?)
1259                 } else {
1260                     // Any space in unallocated clusters can be left alone, since
1261                     // unallocated clusters already read back as zeroes.
1262                     self.file_offset_read(curr_addr)?
1263                 };
1264                 if let Some(offset) = offset {
1265                     // Partial cluster - zero it out.
1266                     self.raw_file
1267                         .file_mut()
1268                         .write_zeroes_all_at(offset, count)?;
1269                 }
1270             }
1271 
1272             nwritten += count;
1273         }
1274         Ok(())
1275     }
1276 
1277     // Reads an L2 cluster from the disk, returning an error if the file can't be read or if any
1278     // cluster is compressed.
read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>>1279     fn read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>> {
1280         let file_values = raw_file.read_pointer_cluster(cluster_addr, None)?;
1281         if file_values.iter().any(|entry| entry & COMPRESSED_FLAG != 0) {
1282             return Err(std::io::Error::from_raw_os_error(ENOTSUP));
1283         }
1284         Ok(file_values
1285             .iter()
1286             .map(|entry| *entry & L2_TABLE_OFFSET_MASK)
1287             .collect())
1288     }
1289 
1290     // Set the refcount for a cluster with the given address.
1291     // Returns a list of any refblocks that can be reused, this happens when a refblock is moved,
1292     // the old location can be reused.
set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>>1293     fn set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>> {
1294         let mut added_clusters = Vec::new();
1295         let mut unref_clusters = Vec::new();
1296         let mut refcount_set = false;
1297         let mut new_cluster = None;
1298 
1299         while !refcount_set {
1300             match self.refcounts.set_cluster_refcount(
1301                 &mut self.raw_file,
1302                 address,
1303                 refcount,
1304                 new_cluster.take(),
1305             ) {
1306                 Ok(None) => {
1307                     refcount_set = true;
1308                 }
1309                 Ok(Some(freed_cluster)) => {
1310                     unref_clusters.push(freed_cluster);
1311                     refcount_set = true;
1312                 }
1313                 Err(refcount::Error::EvictingRefCounts(e)) => {
1314                     return Err(e);
1315                 }
1316                 Err(refcount::Error::InvalidIndex) => {
1317                     return Err(std::io::Error::from_raw_os_error(EINVAL));
1318                 }
1319                 Err(refcount::Error::NeedCluster(addr)) => {
1320                     // Read the address and call set_cluster_refcount again.
1321                     new_cluster = Some((
1322                         addr,
1323                         VecCache::from_vec(self.raw_file.read_refcount_block(addr)?),
1324                     ));
1325                 }
1326                 Err(refcount::Error::NeedNewCluster) => {
1327                     // Allocate the cluster and call set_cluster_refcount again.
1328                     let addr = self.get_new_cluster(None)?;
1329                     added_clusters.push(addr);
1330                     new_cluster = Some((
1331                         addr,
1332                         VecCache::new(self.refcounts.refcounts_per_block() as usize),
1333                     ));
1334                 }
1335                 Err(refcount::Error::ReadingRefCounts(e)) => {
1336                     return Err(e);
1337                 }
1338             }
1339         }
1340 
1341         for addr in added_clusters {
1342             self.set_cluster_refcount(addr, 1)?;
1343         }
1344         Ok(unref_clusters)
1345     }
1346 
sync_caches(&mut self) -> std::io::Result<()>1347     fn sync_caches(&mut self) -> std::io::Result<()> {
1348         // Write out all dirty L2 tables.
1349         for (l1_index, l2_table) in self.l2_cache.iter_mut().filter(|(_k, v)| v.dirty()) {
1350             // The index must be valid from when we insterted it.
1351             let addr = self.l1_table[*l1_index];
1352             if addr != 0 {
1353                 self.raw_file.write_pointer_table(
1354                     addr,
1355                     l2_table.get_values(),
1356                     CLUSTER_USED_FLAG,
1357                 )?;
1358             } else {
1359                 return Err(std::io::Error::from_raw_os_error(EINVAL));
1360             }
1361             l2_table.mark_clean();
1362         }
1363         // Write the modified refcount blocks.
1364         self.refcounts.flush_blocks(&mut self.raw_file)?;
1365         // Make sure metadata(file len) and all data clusters are written.
1366         self.raw_file.file_mut().sync_all()?;
1367 
1368         // Push L1 table and refcount table last as all the clusters they point to are now
1369         // guaranteed to be valid.
1370         let mut sync_required = false;
1371         if self.l1_table.dirty() {
1372             self.raw_file.write_pointer_table(
1373                 self.header.l1_table_offset,
1374                 self.l1_table.get_values(),
1375                 0,
1376             )?;
1377             self.l1_table.mark_clean();
1378             sync_required = true;
1379         }
1380         sync_required |= self.refcounts.flush_table(&mut self.raw_file)?;
1381         if sync_required {
1382             self.raw_file.file_mut().sync_data()?;
1383         }
1384         Ok(())
1385     }
1386 
1387     // Reads `count` bytes starting at `address`, calling `cb` repeatedly with the data source,
1388     // number of bytes read so far, offset to read from, and number of bytes to read from the file
1389     // in that invocation. If None is given to `cb` in place of the backing file, the `cb` should
1390     // infer zeros would have been read.
read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize> where F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,1391     fn read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1392     where
1393         F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,
1394     {
1395         let read_count: usize = self.limit_range_file(address, count);
1396 
1397         let mut nread: usize = 0;
1398         while nread < read_count {
1399             let curr_addr = address + nread as u64;
1400             let file_offset = self.file_offset_read(curr_addr)?;
1401             let count = self.limit_range_cluster(curr_addr, read_count - nread);
1402 
1403             if let Some(offset) = file_offset {
1404                 cb(Some(self.raw_file.file_mut()), nread, offset, count)?;
1405             } else if let Some(backing) = self.backing_file.as_mut() {
1406                 cb(Some(backing.as_mut()), nread, curr_addr, count)?;
1407             } else {
1408                 cb(None, nread, 0, count)?;
1409             }
1410 
1411             nread += count;
1412         }
1413         Ok(read_count)
1414     }
1415 
1416     // Writes `count` bytes starting at `address`, calling `cb` repeatedly with the backing file,
1417     // number of bytes written so far, raw file offset, and number of bytes to write to the file in
1418     // that invocation.
write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize> where F: FnMut(&mut File, usize, u64, usize) -> std::io::Result<()>,1419     fn write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1420     where
1421         F: FnMut(&mut File, usize, u64, usize) -> std::io::Result<()>,
1422     {
1423         let write_count: usize = self.limit_range_file(address, count);
1424 
1425         let mut nwritten: usize = 0;
1426         while nwritten < write_count {
1427             let curr_addr = address + nwritten as u64;
1428             let offset = self.file_offset_write(curr_addr)?;
1429             let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1430 
1431             cb(self.raw_file.file_mut(), nwritten, offset, count)?;
1432 
1433             nwritten += count;
1434         }
1435         Ok(write_count)
1436     }
1437 }
1438 
1439 impl Drop for QcowFile {
drop(&mut self)1440     fn drop(&mut self) {
1441         let _ = self.sync_caches();
1442     }
1443 }
1444 
1445 impl AsRawDescriptors for QcowFile {
as_raw_descriptors(&self) -> Vec<RawDescriptor>1446     fn as_raw_descriptors(&self) -> Vec<RawDescriptor> {
1447         let mut descriptors = vec![self.raw_file.file().as_raw_descriptor()];
1448         if let Some(backing) = &self.backing_file {
1449             descriptors.append(&mut backing.as_raw_descriptors());
1450         }
1451         descriptors
1452     }
1453 }
1454 
1455 impl Read for QcowFile {
read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>1456     fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1457         let len = buf.len();
1458         let slice = VolatileSlice::new(buf);
1459         let read_count = self.read_cb(
1460             self.current_offset,
1461             len,
1462             |file, already_read, offset, count| {
1463                 let sub_slice = slice.get_slice(already_read, count).unwrap();
1464                 match file {
1465                     Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1466                     None => {
1467                         sub_slice.write_bytes(0);
1468                         Ok(())
1469                     }
1470                 }
1471             },
1472         )?;
1473         self.current_offset += read_count as u64;
1474         Ok(read_count)
1475     }
1476 }
1477 
1478 impl Seek for QcowFile {
seek(&mut self, pos: SeekFrom) -> std::io::Result<u64>1479     fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
1480         let new_offset: Option<u64> = match pos {
1481             SeekFrom::Start(off) => Some(off),
1482             SeekFrom::End(off) => {
1483                 if off < 0 {
1484                     0i64.checked_sub(off)
1485                         .and_then(|increment| self.virtual_size().checked_sub(increment as u64))
1486                 } else {
1487                     self.virtual_size().checked_add(off as u64)
1488                 }
1489             }
1490             SeekFrom::Current(off) => {
1491                 if off < 0 {
1492                     0i64.checked_sub(off)
1493                         .and_then(|increment| self.current_offset.checked_sub(increment as u64))
1494                 } else {
1495                     self.current_offset.checked_add(off as u64)
1496                 }
1497             }
1498         };
1499 
1500         if let Some(o) = new_offset {
1501             if o <= self.virtual_size() {
1502                 self.current_offset = o;
1503                 return Ok(o);
1504             }
1505         }
1506         Err(std::io::Error::from_raw_os_error(EINVAL))
1507     }
1508 }
1509 
1510 impl Write for QcowFile {
write(&mut self, buf: &[u8]) -> std::io::Result<usize>1511     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1512         let write_count = self.write_cb(
1513             self.current_offset,
1514             buf.len(),
1515             |file, offset, raw_offset, count| {
1516                 file.seek(SeekFrom::Start(raw_offset))?;
1517                 file.write_all(&buf[offset..(offset + count)])
1518             },
1519         )?;
1520         self.current_offset += write_count as u64;
1521         Ok(write_count)
1522     }
1523 
flush(&mut self) -> std::io::Result<()>1524     fn flush(&mut self) -> std::io::Result<()> {
1525         self.fsync()
1526     }
1527 }
1528 
1529 impl FileReadWriteAtVolatile for QcowFile {
read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize>1530     fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1531         self.read_cb(offset, slice.size(), |file, read, offset, count| {
1532             let sub_slice = slice.get_slice(read, count).unwrap();
1533             match file {
1534                 Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1535                 None => {
1536                     sub_slice.write_bytes(0);
1537                     Ok(())
1538                 }
1539             }
1540         })
1541     }
1542 
write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize>1543     fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1544         self.write_cb(offset, slice.size(), |file, offset, raw_offset, count| {
1545             let sub_slice = slice.get_slice(offset, count).unwrap();
1546             file.write_all_at_volatile(sub_slice, raw_offset)
1547         })
1548     }
1549 }
1550 
1551 impl FileSync for QcowFile {
fsync(&mut self) -> std::io::Result<()>1552     fn fsync(&mut self) -> std::io::Result<()> {
1553         self.sync_caches()?;
1554         self.avail_clusters.append(&mut self.unref_clusters);
1555         Ok(())
1556     }
1557 
fdatasync(&mut self) -> io::Result<()>1558     fn fdatasync(&mut self) -> io::Result<()> {
1559         // QcowFile does not implement fdatasync. Just fall back to fsync.
1560         self.fsync()
1561     }
1562 }
1563 
1564 impl FileSetLen for QcowFile {
set_len(&self, _len: u64) -> std::io::Result<()>1565     fn set_len(&self, _len: u64) -> std::io::Result<()> {
1566         Err(std::io::Error::new(
1567             std::io::ErrorKind::Other,
1568             "set_len() not supported for QcowFile",
1569         ))
1570     }
1571 }
1572 
1573 impl DiskGetLen for QcowFile {
get_len(&self) -> io::Result<u64>1574     fn get_len(&self) -> io::Result<u64> {
1575         Ok(self.virtual_size())
1576     }
1577 }
1578 
1579 impl FileAllocate for QcowFile {
allocate(&mut self, offset: u64, len: u64) -> io::Result<()>1580     fn allocate(&mut self, offset: u64, len: u64) -> io::Result<()> {
1581         // Call write_cb with a do-nothing callback, which will have the effect
1582         // of allocating all clusters in the specified range.
1583         self.write_cb(
1584             offset,
1585             len as usize,
1586             |_file, _offset, _raw_offset, _count| Ok(()),
1587         )?;
1588         Ok(())
1589     }
1590 }
1591 
1592 impl PunchHoleMut for QcowFile {
punch_hole_mut(&mut self, offset: u64, length: u64) -> std::io::Result<()>1593     fn punch_hole_mut(&mut self, offset: u64, length: u64) -> std::io::Result<()> {
1594         let mut remaining = length;
1595         let mut offset = offset;
1596         while remaining > 0 {
1597             let chunk_length = min(remaining, std::usize::MAX as u64) as usize;
1598             self.zero_bytes(offset, chunk_length)?;
1599             remaining -= chunk_length as u64;
1600             offset += chunk_length as u64;
1601         }
1602         Ok(())
1603     }
1604 }
1605 
1606 impl WriteZeroesAt for QcowFile {
write_zeroes_at(&mut self, offset: u64, length: usize) -> io::Result<usize>1607     fn write_zeroes_at(&mut self, offset: u64, length: usize) -> io::Result<usize> {
1608         self.punch_hole_mut(offset, length as u64)?;
1609         Ok(length)
1610     }
1611 }
1612 
1613 impl ToAsyncDisk for QcowFile {
to_async_disk(self: Box<Self>, ex: &Executor) -> crate::Result<Box<dyn AsyncDisk>>1614     fn to_async_disk(self: Box<Self>, ex: &Executor) -> crate::Result<Box<dyn AsyncDisk>> {
1615         Ok(Box::new(AsyncDiskFileWrapper::new(*self, ex)))
1616     }
1617 }
1618 
1619 // Returns an Error if the given offset doesn't align to a cluster boundary.
offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()>1620 fn offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()> {
1621     if offset & ((0x01 << cluster_bits) - 1) != 0 {
1622         return Err(Error::InvalidOffset(offset));
1623     }
1624     Ok(())
1625 }
1626 
1627 #[cfg(test)]
1628 mod tests {
1629     use std::fs::OpenOptions;
1630     use std::io::Read;
1631     use std::io::Seek;
1632     use std::io::SeekFrom;
1633     use std::io::Write;
1634 
1635     use tempfile::tempfile;
1636     use tempfile::TempDir;
1637 
1638     use super::*;
1639     use crate::MAX_NESTING_DEPTH;
1640 
valid_header() -> Vec<u8>1641     fn valid_header() -> Vec<u8> {
1642         vec![
1643             0x51u8, 0x46, 0x49, 0xfb, // magic
1644             0x00, 0x00, 0x00, 0x03, // version
1645             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1646             0x00, 0x00, 0x00, 0x00, // backing file size
1647             0x00, 0x00, 0x00, 0x10, // cluster_bits
1648             0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, // size
1649             0x00, 0x00, 0x00, 0x00, // crypt method
1650             0x00, 0x00, 0x01, 0x00, // L1 size
1651             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1652             0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1653             0x00, 0x00, 0x00, 0x03, // refcount table clusters
1654             0x00, 0x00, 0x00, 0x00, // nb snapshots
1655             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1656             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1657             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1658             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1659             0x00, 0x00, 0x00, 0x04, // refcount_order
1660             0x00, 0x00, 0x00, 0x68, // header_length
1661         ]
1662     }
1663 
1664     // Test case found by clusterfuzz to allocate excessive memory.
test_huge_header() -> Vec<u8>1665     fn test_huge_header() -> Vec<u8> {
1666         vec![
1667             0x51, 0x46, 0x49, 0xfb, // magic
1668             0x00, 0x00, 0x00, 0x03, // version
1669             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1670             0x00, 0x00, 0x00, 0x00, // backing file size
1671             0x00, 0x00, 0x00, 0x09, // cluster_bits
1672             0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, // size
1673             0x00, 0x00, 0x00, 0x00, // crypt method
1674             0x00, 0x00, 0x01, 0x00, // L1 size
1675             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1676             0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1677             0x00, 0x00, 0x00, 0x03, // refcount table clusters
1678             0x00, 0x00, 0x00, 0x00, // nb snapshots
1679             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1680             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1681             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1682             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1683             0x00, 0x00, 0x00, 0x04, // refcount_order
1684             0x00, 0x00, 0x00, 0x68, // header_length
1685         ]
1686     }
1687 
basic_file(header: &[u8]) -> File1688     fn basic_file(header: &[u8]) -> File {
1689         let mut disk_file = tempfile().expect("failed to create temp file");
1690         disk_file.write_all(header).unwrap();
1691         disk_file.set_len(0x8000_0000).unwrap();
1692         disk_file.seek(SeekFrom::Start(0)).unwrap();
1693         disk_file
1694     }
1695 
with_basic_file<F>(header: &[u8], mut testfn: F) where F: FnMut(File),1696     fn with_basic_file<F>(header: &[u8], mut testfn: F)
1697     where
1698         F: FnMut(File),
1699     {
1700         testfn(basic_file(header)); // File closed when the function exits.
1701     }
1702 
with_default_file<F>(file_size: u64, mut testfn: F) where F: FnMut(QcowFile),1703     fn with_default_file<F>(file_size: u64, mut testfn: F)
1704     where
1705         F: FnMut(QcowFile),
1706     {
1707         let file = tempfile().expect("failed to create temp file");
1708         let qcow_file = QcowFile::new(file, file_size).unwrap();
1709 
1710         testfn(qcow_file); // File closed when the function exits.
1711     }
1712 
1713     // Test helper function to convert a normal slice to a VolatileSlice and write it.
write_all_at(qcow: &mut QcowFile, data: &[u8], offset: u64) -> std::io::Result<()>1714     fn write_all_at(qcow: &mut QcowFile, data: &[u8], offset: u64) -> std::io::Result<()> {
1715         let mut mem = data.to_owned();
1716         let vslice = VolatileSlice::new(&mut mem);
1717         qcow.write_all_at_volatile(vslice, offset)
1718     }
1719 
1720     // Test helper function to read to a VolatileSlice and copy it to a normal slice.
read_exact_at(qcow: &mut QcowFile, data: &mut [u8], offset: u64) -> std::io::Result<()>1721     fn read_exact_at(qcow: &mut QcowFile, data: &mut [u8], offset: u64) -> std::io::Result<()> {
1722         let mut mem = data.to_owned();
1723         let vslice = VolatileSlice::new(&mut mem);
1724         qcow.read_exact_at_volatile(vslice, offset)?;
1725         vslice.copy_to(data);
1726         Ok(())
1727     }
1728 
1729     #[test]
default_header()1730     fn default_header() {
1731         let header = QcowHeader::create_for_size_and_path(0x10_0000, None);
1732         let mut disk_file = tempfile().expect("failed to create temp file");
1733         header
1734             .expect("Failed to create header.")
1735             .write_to(&mut disk_file)
1736             .expect("Failed to write header to shm.");
1737         disk_file.seek(SeekFrom::Start(0)).unwrap();
1738         QcowFile::from(disk_file, MAX_NESTING_DEPTH)
1739             .expect("Failed to create Qcow from default Header");
1740     }
1741 
1742     #[test]
header_read()1743     fn header_read() {
1744         with_basic_file(&valid_header(), |mut disk_file: File| {
1745             QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
1746         });
1747     }
1748 
1749     #[test]
header_with_backing()1750     fn header_with_backing() {
1751         let header = QcowHeader::create_for_size_and_path(0x10_0000, Some("/my/path/to/a/file"))
1752             .expect("Failed to create header.");
1753         let mut disk_file = tempfile().expect("failed to create temp file");
1754         header
1755             .write_to(&mut disk_file)
1756             .expect("Failed to write header to shm.");
1757         disk_file.seek(SeekFrom::Start(0)).unwrap();
1758         let read_header = QcowHeader::new(&mut disk_file).expect("Failed to create header.");
1759         assert_eq!(
1760             header.backing_file_path,
1761             Some(String::from("/my/path/to/a/file"))
1762         );
1763         assert_eq!(read_header.backing_file_path, header.backing_file_path);
1764     }
1765 
1766     #[test]
invalid_magic()1767     fn invalid_magic() {
1768         let invalid_header = vec![0x51u8, 0x46, 0x4a, 0xfb];
1769         with_basic_file(&invalid_header, |mut disk_file: File| {
1770             QcowHeader::new(&mut disk_file).expect_err("Invalid header worked.");
1771         });
1772     }
1773 
1774     #[test]
invalid_refcount_order()1775     fn invalid_refcount_order() {
1776         let mut header = valid_header();
1777         header[99] = 2;
1778         with_basic_file(&header, |disk_file: File| {
1779             QcowFile::from(disk_file, MAX_NESTING_DEPTH)
1780                 .expect_err("Invalid refcount order worked.");
1781         });
1782     }
1783 
1784     #[test]
invalid_cluster_bits()1785     fn invalid_cluster_bits() {
1786         let mut header = valid_header();
1787         header[23] = 3;
1788         with_basic_file(&header, |disk_file: File| {
1789             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1790         });
1791     }
1792 
1793     #[test]
test_header_huge_file()1794     fn test_header_huge_file() {
1795         let header = test_huge_header();
1796         with_basic_file(&header, |disk_file: File| {
1797             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1798         });
1799     }
1800 
1801     #[test]
test_header_excessive_file_size_rejected()1802     fn test_header_excessive_file_size_rejected() {
1803         let mut header = valid_header();
1804         header[24..32].copy_from_slice(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e]);
1805         with_basic_file(&header, |disk_file: File| {
1806             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1807         });
1808     }
1809 
1810     #[test]
test_huge_l1_table()1811     fn test_huge_l1_table() {
1812         let mut header = valid_header();
1813         header[36] = 0x12;
1814         with_basic_file(&header, |disk_file: File| {
1815             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1816         });
1817     }
1818 
1819     #[test]
test_header_1_tb_file_min_cluster()1820     fn test_header_1_tb_file_min_cluster() {
1821         let mut header = test_huge_header();
1822         header[24] = 0;
1823         header[26] = 1;
1824         header[31] = 0;
1825         // 1 TB with the min cluster size makes the arrays too big, it should fail.
1826         with_basic_file(&header, |disk_file: File| {
1827             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1828         });
1829     }
1830 
1831     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1832     #[test]
test_header_1_tb_file()1833     fn test_header_1_tb_file() {
1834         let mut header = test_huge_header();
1835         // reset to 1 TB size.
1836         header[24] = 0;
1837         header[26] = 1;
1838         header[31] = 0;
1839         // set cluster_bits
1840         header[23] = 16;
1841         with_basic_file(&header, |disk_file: File| {
1842             let mut qcow =
1843                 QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect("Failed to create file.");
1844             let value = 0x0000_0040_3f00_ffffu64;
1845             write_all_at(&mut qcow, &value.to_le_bytes(), 0x100_0000_0000 - 8)
1846                 .expect("failed to write data");
1847         });
1848     }
1849 
1850     #[test]
test_header_huge_num_refcounts()1851     fn test_header_huge_num_refcounts() {
1852         let mut header = valid_header();
1853         header[56..60].copy_from_slice(&[0x02, 0x00, 0xe8, 0xff]);
1854         with_basic_file(&header, |disk_file: File| {
1855             QcowFile::from(disk_file, MAX_NESTING_DEPTH)
1856                 .expect_err("Created disk with excessive refcount clusters");
1857         });
1858     }
1859 
1860     #[test]
test_header_huge_refcount_offset()1861     fn test_header_huge_refcount_offset() {
1862         let mut header = valid_header();
1863         header[48..56].copy_from_slice(&[0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x02, 0x00]);
1864         with_basic_file(&header, |disk_file: File| {
1865             QcowFile::from(disk_file, MAX_NESTING_DEPTH)
1866                 .expect_err("Created disk with excessive refcount offset");
1867         });
1868     }
1869 
1870     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1871     #[test]
write_read_start()1872     fn write_read_start() {
1873         with_basic_file(&valid_header(), |disk_file: File| {
1874             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1875             write_all_at(&mut q, b"test first bytes", 0).expect("Failed to write test string.");
1876             let mut buf = [0u8; 4];
1877             read_exact_at(&mut q, &mut buf, 0).expect("Failed to read.");
1878             assert_eq!(&buf, b"test");
1879         });
1880     }
1881 
1882     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1883     #[test]
write_read_start_backing()1884     fn write_read_start_backing() {
1885         let disk_file = basic_file(&valid_header());
1886         let mut backing = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1887         write_all_at(&mut backing, b"test first bytes", 0).expect("Failed to write test string.");
1888         let mut buf = [0u8; 4];
1889         let wrapping_disk_file = basic_file(&valid_header());
1890         let mut wrapping = QcowFile::from(wrapping_disk_file, MAX_NESTING_DEPTH).unwrap();
1891         wrapping.set_backing_file(Some(Box::new(backing)));
1892         read_exact_at(&mut wrapping, &mut buf, 0).expect("Failed to read.");
1893         assert_eq!(&buf, b"test");
1894     }
1895 
1896     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1897     #[test]
write_read_start_backing_overlap()1898     fn write_read_start_backing_overlap() {
1899         let disk_file = basic_file(&valid_header());
1900         let mut backing = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1901         write_all_at(&mut backing, b"test first bytes", 0).expect("Failed to write test string.");
1902         let wrapping_disk_file = basic_file(&valid_header());
1903         let mut wrapping = QcowFile::from(wrapping_disk_file, MAX_NESTING_DEPTH).unwrap();
1904         wrapping.set_backing_file(Some(Box::new(backing)));
1905         write_all_at(&mut wrapping, b"TEST", 0).expect("Failed to write second test string.");
1906         let mut buf = [0u8; 10];
1907         read_exact_at(&mut wrapping, &mut buf, 0).expect("Failed to read.");
1908         assert_eq!(&buf, b"TEST first");
1909     }
1910 
1911     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1912     #[test]
offset_write_read()1913     fn offset_write_read() {
1914         with_basic_file(&valid_header(), |disk_file: File| {
1915             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1916             let b = [0x55u8; 0x1000];
1917             write_all_at(&mut q, &b, 0xfff2000).expect("Failed to write test string.");
1918             let mut buf = [0u8; 4];
1919             read_exact_at(&mut q, &mut buf, 0xfff2000).expect("Failed to read.");
1920             assert_eq!(buf[0], 0x55);
1921         });
1922     }
1923 
1924     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1925     #[test]
write_zeroes_read()1926     fn write_zeroes_read() {
1927         with_basic_file(&valid_header(), |disk_file: File| {
1928             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1929             // Write some test data.
1930             let b = [0x55u8; 0x1000];
1931             write_all_at(&mut q, &b, 0xfff2000).expect("Failed to write test string.");
1932             // Overwrite the test data with zeroes.
1933             q.write_zeroes_all_at(0xfff2000, 0x200)
1934                 .expect("Failed to write zeroes.");
1935             // Verify that the correct part of the data was zeroed out.
1936             let mut buf = [0u8; 0x1000];
1937             read_exact_at(&mut q, &mut buf, 0xfff2000).expect("Failed to read.");
1938             assert_eq!(buf[0], 0);
1939             assert_eq!(buf[0x1FF], 0);
1940             assert_eq!(buf[0x200], 0x55);
1941             assert_eq!(buf[0xFFF], 0x55);
1942         });
1943     }
1944 
1945     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1946     #[test]
write_zeroes_full_cluster()1947     fn write_zeroes_full_cluster() {
1948         // Choose a size that is larger than a cluster.
1949         // valid_header uses cluster_bits = 12, which corresponds to a cluster size of 4096.
1950         const CHUNK_SIZE: usize = 4096 * 2 + 512;
1951         with_basic_file(&valid_header(), |disk_file: File| {
1952             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1953             // Write some test data.
1954             let b = [0x55u8; CHUNK_SIZE];
1955             write_all_at(&mut q, &b, 0).expect("Failed to write test string.");
1956             // Overwrite the full cluster with zeroes.
1957             q.write_zeroes_all_at(0, CHUNK_SIZE)
1958                 .expect("Failed to write zeroes.");
1959             // Verify that the data was zeroed out.
1960             let mut buf = [0u8; CHUNK_SIZE];
1961             read_exact_at(&mut q, &mut buf, 0).expect("Failed to read.");
1962             assert_eq!(buf[0], 0);
1963             assert_eq!(buf[CHUNK_SIZE - 1], 0);
1964         });
1965     }
1966 
1967     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1968     #[test]
write_zeroes_backing()1969     fn write_zeroes_backing() {
1970         let disk_file = basic_file(&valid_header());
1971         let mut backing = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1972         // Write some test data.
1973         let b = [0x55u8; 0x1000];
1974         write_all_at(&mut backing, &b, 0xfff2000).expect("Failed to write test string.");
1975         let wrapping_disk_file = basic_file(&valid_header());
1976         let mut wrapping = QcowFile::from(wrapping_disk_file, MAX_NESTING_DEPTH).unwrap();
1977         wrapping.set_backing_file(Some(Box::new(backing)));
1978         // Overwrite the test data with zeroes.
1979         // This should allocate new clusters in the wrapping file so that they can be zeroed.
1980         wrapping
1981             .write_zeroes_all_at(0xfff2000, 0x200)
1982             .expect("Failed to write zeroes.");
1983         // Verify that the correct part of the data was zeroed out.
1984         let mut buf = [0u8; 0x1000];
1985         read_exact_at(&mut wrapping, &mut buf, 0xfff2000).expect("Failed to read.");
1986         assert_eq!(buf[0], 0);
1987         assert_eq!(buf[0x1FF], 0);
1988         assert_eq!(buf[0x200], 0x55);
1989         assert_eq!(buf[0xFFF], 0x55);
1990     }
1991     #[test]
test_header()1992     fn test_header() {
1993         with_basic_file(&valid_header(), |disk_file: File| {
1994             let q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1995             assert_eq!(q.virtual_size(), 0x20_0000_0000);
1996         });
1997     }
1998 
1999     #[test]
read_small_buffer()2000     fn read_small_buffer() {
2001         with_basic_file(&valid_header(), |disk_file: File| {
2002             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
2003             let mut b = [5u8; 16];
2004             read_exact_at(&mut q, &mut b, 1000).expect("Failed to read.");
2005             assert_eq!(0, b[0]);
2006             assert_eq!(0, b[15]);
2007         });
2008     }
2009 
2010     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2011     #[test]
replay_ext4()2012     fn replay_ext4() {
2013         with_basic_file(&valid_header(), |disk_file: File| {
2014             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
2015             const BUF_SIZE: usize = 0x1000;
2016             let mut b = [0u8; BUF_SIZE];
2017 
2018             struct Transfer {
2019                 pub write: bool,
2020                 pub addr: u64,
2021             }
2022 
2023             // Write transactions from mkfs.ext4.
2024             let xfers: Vec<Transfer> = vec![
2025                 Transfer {
2026                     write: false,
2027                     addr: 0xfff0000,
2028                 },
2029                 Transfer {
2030                     write: false,
2031                     addr: 0xfffe000,
2032                 },
2033                 Transfer {
2034                     write: false,
2035                     addr: 0x0,
2036                 },
2037                 Transfer {
2038                     write: false,
2039                     addr: 0x1000,
2040                 },
2041                 Transfer {
2042                     write: false,
2043                     addr: 0xffff000,
2044                 },
2045                 Transfer {
2046                     write: false,
2047                     addr: 0xffdf000,
2048                 },
2049                 Transfer {
2050                     write: false,
2051                     addr: 0xfff8000,
2052                 },
2053                 Transfer {
2054                     write: false,
2055                     addr: 0xffe0000,
2056                 },
2057                 Transfer {
2058                     write: false,
2059                     addr: 0xffce000,
2060                 },
2061                 Transfer {
2062                     write: false,
2063                     addr: 0xffb6000,
2064                 },
2065                 Transfer {
2066                     write: false,
2067                     addr: 0xffab000,
2068                 },
2069                 Transfer {
2070                     write: false,
2071                     addr: 0xffa4000,
2072                 },
2073                 Transfer {
2074                     write: false,
2075                     addr: 0xff8e000,
2076                 },
2077                 Transfer {
2078                     write: false,
2079                     addr: 0xff86000,
2080                 },
2081                 Transfer {
2082                     write: false,
2083                     addr: 0xff84000,
2084                 },
2085                 Transfer {
2086                     write: false,
2087                     addr: 0xff89000,
2088                 },
2089                 Transfer {
2090                     write: false,
2091                     addr: 0xfe7e000,
2092                 },
2093                 Transfer {
2094                     write: false,
2095                     addr: 0x100000,
2096                 },
2097                 Transfer {
2098                     write: false,
2099                     addr: 0x3000,
2100                 },
2101                 Transfer {
2102                     write: false,
2103                     addr: 0x7000,
2104                 },
2105                 Transfer {
2106                     write: false,
2107                     addr: 0xf000,
2108                 },
2109                 Transfer {
2110                     write: false,
2111                     addr: 0x2000,
2112                 },
2113                 Transfer {
2114                     write: false,
2115                     addr: 0x4000,
2116                 },
2117                 Transfer {
2118                     write: false,
2119                     addr: 0x5000,
2120                 },
2121                 Transfer {
2122                     write: false,
2123                     addr: 0x6000,
2124                 },
2125                 Transfer {
2126                     write: false,
2127                     addr: 0x8000,
2128                 },
2129                 Transfer {
2130                     write: false,
2131                     addr: 0x9000,
2132                 },
2133                 Transfer {
2134                     write: false,
2135                     addr: 0xa000,
2136                 },
2137                 Transfer {
2138                     write: false,
2139                     addr: 0xb000,
2140                 },
2141                 Transfer {
2142                     write: false,
2143                     addr: 0xc000,
2144                 },
2145                 Transfer {
2146                     write: false,
2147                     addr: 0xd000,
2148                 },
2149                 Transfer {
2150                     write: false,
2151                     addr: 0xe000,
2152                 },
2153                 Transfer {
2154                     write: false,
2155                     addr: 0x10000,
2156                 },
2157                 Transfer {
2158                     write: false,
2159                     addr: 0x11000,
2160                 },
2161                 Transfer {
2162                     write: false,
2163                     addr: 0x12000,
2164                 },
2165                 Transfer {
2166                     write: false,
2167                     addr: 0x13000,
2168                 },
2169                 Transfer {
2170                     write: false,
2171                     addr: 0x14000,
2172                 },
2173                 Transfer {
2174                     write: false,
2175                     addr: 0x15000,
2176                 },
2177                 Transfer {
2178                     write: false,
2179                     addr: 0x16000,
2180                 },
2181                 Transfer {
2182                     write: false,
2183                     addr: 0x17000,
2184                 },
2185                 Transfer {
2186                     write: false,
2187                     addr: 0x18000,
2188                 },
2189                 Transfer {
2190                     write: false,
2191                     addr: 0x19000,
2192                 },
2193                 Transfer {
2194                     write: false,
2195                     addr: 0x1a000,
2196                 },
2197                 Transfer {
2198                     write: false,
2199                     addr: 0x1b000,
2200                 },
2201                 Transfer {
2202                     write: false,
2203                     addr: 0x1c000,
2204                 },
2205                 Transfer {
2206                     write: false,
2207                     addr: 0x1d000,
2208                 },
2209                 Transfer {
2210                     write: false,
2211                     addr: 0x1e000,
2212                 },
2213                 Transfer {
2214                     write: false,
2215                     addr: 0x1f000,
2216                 },
2217                 Transfer {
2218                     write: false,
2219                     addr: 0x21000,
2220                 },
2221                 Transfer {
2222                     write: false,
2223                     addr: 0x22000,
2224                 },
2225                 Transfer {
2226                     write: false,
2227                     addr: 0x24000,
2228                 },
2229                 Transfer {
2230                     write: false,
2231                     addr: 0x40000,
2232                 },
2233                 Transfer {
2234                     write: false,
2235                     addr: 0x0,
2236                 },
2237                 Transfer {
2238                     write: false,
2239                     addr: 0x3000,
2240                 },
2241                 Transfer {
2242                     write: false,
2243                     addr: 0x7000,
2244                 },
2245                 Transfer {
2246                     write: false,
2247                     addr: 0x0,
2248                 },
2249                 Transfer {
2250                     write: false,
2251                     addr: 0x1000,
2252                 },
2253                 Transfer {
2254                     write: false,
2255                     addr: 0x2000,
2256                 },
2257                 Transfer {
2258                     write: false,
2259                     addr: 0x3000,
2260                 },
2261                 Transfer {
2262                     write: false,
2263                     addr: 0x0,
2264                 },
2265                 Transfer {
2266                     write: false,
2267                     addr: 0x449000,
2268                 },
2269                 Transfer {
2270                     write: false,
2271                     addr: 0x48000,
2272                 },
2273                 Transfer {
2274                     write: false,
2275                     addr: 0x48000,
2276                 },
2277                 Transfer {
2278                     write: false,
2279                     addr: 0x448000,
2280                 },
2281                 Transfer {
2282                     write: false,
2283                     addr: 0x44a000,
2284                 },
2285                 Transfer {
2286                     write: false,
2287                     addr: 0x48000,
2288                 },
2289                 Transfer {
2290                     write: false,
2291                     addr: 0x48000,
2292                 },
2293                 Transfer {
2294                     write: true,
2295                     addr: 0x0,
2296                 },
2297                 Transfer {
2298                     write: true,
2299                     addr: 0x448000,
2300                 },
2301                 Transfer {
2302                     write: true,
2303                     addr: 0x449000,
2304                 },
2305                 Transfer {
2306                     write: true,
2307                     addr: 0x44a000,
2308                 },
2309                 Transfer {
2310                     write: true,
2311                     addr: 0xfff0000,
2312                 },
2313                 Transfer {
2314                     write: true,
2315                     addr: 0xfff1000,
2316                 },
2317                 Transfer {
2318                     write: true,
2319                     addr: 0xfff2000,
2320                 },
2321                 Transfer {
2322                     write: true,
2323                     addr: 0xfff3000,
2324                 },
2325                 Transfer {
2326                     write: true,
2327                     addr: 0xfff4000,
2328                 },
2329                 Transfer {
2330                     write: true,
2331                     addr: 0xfff5000,
2332                 },
2333                 Transfer {
2334                     write: true,
2335                     addr: 0xfff6000,
2336                 },
2337                 Transfer {
2338                     write: true,
2339                     addr: 0xfff7000,
2340                 },
2341                 Transfer {
2342                     write: true,
2343                     addr: 0xfff8000,
2344                 },
2345                 Transfer {
2346                     write: true,
2347                     addr: 0xfff9000,
2348                 },
2349                 Transfer {
2350                     write: true,
2351                     addr: 0xfffa000,
2352                 },
2353                 Transfer {
2354                     write: true,
2355                     addr: 0xfffb000,
2356                 },
2357                 Transfer {
2358                     write: true,
2359                     addr: 0xfffc000,
2360                 },
2361                 Transfer {
2362                     write: true,
2363                     addr: 0xfffd000,
2364                 },
2365                 Transfer {
2366                     write: true,
2367                     addr: 0xfffe000,
2368                 },
2369                 Transfer {
2370                     write: true,
2371                     addr: 0xffff000,
2372                 },
2373             ];
2374 
2375             for xfer in &xfers {
2376                 if xfer.write {
2377                     write_all_at(&mut q, &b, xfer.addr).expect("Failed to write.");
2378                 } else {
2379                     read_exact_at(&mut q, &mut b, xfer.addr).expect("Failed to read.");
2380                 }
2381             }
2382         });
2383     }
2384 
2385     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2386     #[test]
combo_write_read()2387     fn combo_write_read() {
2388         with_default_file(1024 * 1024 * 1024 * 256, |mut qcow_file| {
2389             const NUM_BLOCKS: usize = 55;
2390             const BLOCK_SIZE: usize = 0x1_0000;
2391             const OFFSET: u64 = 0x1_0000_0020;
2392             let data = [0x55u8; BLOCK_SIZE];
2393             let mut readback = [0u8; BLOCK_SIZE];
2394             for i in 0..NUM_BLOCKS {
2395                 let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2396                 write_all_at(&mut qcow_file, &data, seek_offset)
2397                     .expect("Failed to write test data.");
2398                 // Read back the data to check it was written correctly.
2399                 read_exact_at(&mut qcow_file, &mut readback, seek_offset).expect("Failed to read.");
2400                 for (orig, read) in data.iter().zip(readback.iter()) {
2401                     assert_eq!(orig, read);
2402                 }
2403             }
2404             // Check that address 0 is still zeros.
2405             read_exact_at(&mut qcow_file, &mut readback, 0).expect("Failed to read.");
2406             for read in readback.iter() {
2407                 assert_eq!(*read, 0);
2408             }
2409             // Check the data again after the writes have happened.
2410             for i in 0..NUM_BLOCKS {
2411                 let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2412                 read_exact_at(&mut qcow_file, &mut readback, seek_offset).expect("Failed to read.");
2413                 for (orig, read) in data.iter().zip(readback.iter()) {
2414                     assert_eq!(orig, read);
2415                 }
2416             }
2417 
2418             assert_eq!(qcow_file.first_zero_refcount().unwrap(), None);
2419         });
2420     }
2421 
2422     #[test]
rebuild_refcounts()2423     fn rebuild_refcounts() {
2424         with_basic_file(&valid_header(), |mut disk_file: File| {
2425             let header = QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
2426             let cluster_size = 65536;
2427             let mut raw_file =
2428                 QcowRawFile::from(disk_file, cluster_size).expect("Failed to create QcowRawFile.");
2429             QcowFile::rebuild_refcounts(&mut raw_file, header)
2430                 .expect("Failed to rebuild recounts.");
2431         });
2432     }
2433 
2434     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2435     #[test]
nested_qcow()2436     fn nested_qcow() {
2437         let tmp_dir = TempDir::new().unwrap();
2438 
2439         // A file `backing` is backing a qcow file `qcow.l1`, which in turn is backing another
2440         // qcow file.
2441         let backing_file_path = tmp_dir.path().join("backing");
2442         let _backing_file = OpenOptions::new()
2443             .read(true)
2444             .write(true)
2445             .create(true)
2446             .open(&backing_file_path)
2447             .unwrap();
2448 
2449         let level1_qcow_file_path = tmp_dir.path().join("qcow.l1");
2450         let level1_qcow_file = OpenOptions::new()
2451             .read(true)
2452             .write(true)
2453             .create(true)
2454             .open(&level1_qcow_file_path)
2455             .unwrap();
2456         let _level1_qcow_file = QcowFile::new_from_backing(
2457             level1_qcow_file,
2458             backing_file_path.to_str().unwrap(),
2459             1000, /* allow deep nesting */
2460         )
2461         .unwrap();
2462 
2463         let level2_qcow_file = tempfile().unwrap();
2464         let _level2_qcow_file = QcowFile::new_from_backing(
2465             level2_qcow_file,
2466             level1_qcow_file_path.to_str().unwrap(),
2467             1000, /* allow deep nesting */
2468         )
2469         .expect("failed to create level2 qcow file");
2470     }
2471 
2472     #[test]
io_seek()2473     fn io_seek() {
2474         with_default_file(1024 * 1024 * 10, |mut qcow_file| {
2475             // Cursor should start at 0.
2476             assert_eq!(qcow_file.stream_position().unwrap(), 0);
2477 
2478             // Seek 1 MB from start.
2479             assert_eq!(
2480                 qcow_file.seek(SeekFrom::Start(1024 * 1024)).unwrap(),
2481                 1024 * 1024
2482             );
2483 
2484             // Rewind 1 MB + 1 byte (past beginning) - seeking to a negative offset is an error and
2485             // should not move the cursor.
2486             qcow_file
2487                 .seek(SeekFrom::Current(-(1024 * 1024 + 1)))
2488                 .expect_err("negative offset seek should fail");
2489             assert_eq!(qcow_file.stream_position().unwrap(), 1024 * 1024);
2490 
2491             // Seek to last byte.
2492             assert_eq!(
2493                 qcow_file.seek(SeekFrom::End(-1)).unwrap(),
2494                 1024 * 1024 * 10 - 1
2495             );
2496 
2497             // Seek to EOF.
2498             assert_eq!(qcow_file.seek(SeekFrom::End(0)).unwrap(), 1024 * 1024 * 10);
2499 
2500             // Seek past EOF is not allowed.
2501             qcow_file
2502                 .seek(SeekFrom::End(1))
2503                 .expect_err("seek past EOF should fail");
2504         });
2505     }
2506 
2507     #[test]
io_write_read()2508     fn io_write_read() {
2509         with_default_file(1024 * 1024 * 10, |mut qcow_file| {
2510             const BLOCK_SIZE: usize = 0x1_0000;
2511             let data_55 = [0x55u8; BLOCK_SIZE];
2512             let data_aa = [0xaau8; BLOCK_SIZE];
2513             let mut readback = [0u8; BLOCK_SIZE];
2514 
2515             qcow_file.write_all(&data_55).unwrap();
2516             assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64);
2517 
2518             qcow_file.write_all(&data_aa).unwrap();
2519             assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64 * 2);
2520 
2521             // Read BLOCK_SIZE of just 0xaa.
2522             assert_eq!(
2523                 qcow_file
2524                     .seek(SeekFrom::Current(-(BLOCK_SIZE as i64)))
2525                     .unwrap(),
2526                 BLOCK_SIZE as u64
2527             );
2528             qcow_file.read_exact(&mut readback).unwrap();
2529             assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64 * 2);
2530             for (orig, read) in data_aa.iter().zip(readback.iter()) {
2531                 assert_eq!(orig, read);
2532             }
2533 
2534             // Read BLOCK_SIZE of just 0x55.
2535             qcow_file.rewind().unwrap();
2536             qcow_file.read_exact(&mut readback).unwrap();
2537             for (orig, read) in data_55.iter().zip(readback.iter()) {
2538                 assert_eq!(orig, read);
2539             }
2540 
2541             // Read BLOCK_SIZE crossing between the block of 0x55 and 0xaa.
2542             qcow_file
2543                 .seek(SeekFrom::Start(BLOCK_SIZE as u64 / 2))
2544                 .unwrap();
2545             qcow_file.read_exact(&mut readback).unwrap();
2546             for (orig, read) in data_55[BLOCK_SIZE / 2..]
2547                 .iter()
2548                 .chain(data_aa[..BLOCK_SIZE / 2].iter())
2549                 .zip(readback.iter())
2550             {
2551                 assert_eq!(orig, read);
2552             }
2553         });
2554     }
2555 }
2556