• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 mod qcow_raw_file;
6 mod refcount;
7 mod vec_cache;
8 
9 use std::cmp::max;
10 use std::cmp::min;
11 use std::fs::File;
12 use std::fs::OpenOptions;
13 use std::io;
14 use std::io::Read;
15 use std::io::Seek;
16 use std::io::SeekFrom;
17 use std::io::Write;
18 use std::mem::size_of;
19 use std::path::Path;
20 use std::str;
21 
22 use base::error;
23 use base::open_file;
24 use base::AsRawDescriptor;
25 use base::AsRawDescriptors;
26 use base::FileAllocate;
27 use base::FileReadWriteAtVolatile;
28 use base::FileSetLen;
29 use base::FileSync;
30 use base::RawDescriptor;
31 use base::WriteZeroesAt;
32 use cros_async::Executor;
33 use data_model::VolatileMemory;
34 use data_model::VolatileSlice;
35 use libc::EINVAL;
36 use libc::ENOSPC;
37 use libc::ENOTSUP;
38 use remain::sorted;
39 use thiserror::Error;
40 
41 use crate::create_disk_file;
42 use crate::qcow::qcow_raw_file::QcowRawFile;
43 use crate::qcow::refcount::RefCount;
44 use crate::qcow::vec_cache::CacheMap;
45 use crate::qcow::vec_cache::Cacheable;
46 use crate::qcow::vec_cache::VecCache;
47 use crate::AsyncDisk;
48 use crate::AsyncDiskFileWrapper;
49 use crate::DiskFile;
50 use crate::DiskGetLen;
51 use crate::PunchHoleMut;
52 use crate::ToAsyncDisk;
53 
54 #[sorted]
55 #[derive(Error, Debug)]
56 pub enum Error {
57     #[error("backing file io error: {0}")]
58     BackingFileIo(io::Error),
59     #[error("backing file open error: {0}")]
60     BackingFileOpen(Box<crate::Error>),
61     #[error("backing file name is too long: {0} bytes over")]
62     BackingFileTooLong(usize),
63     #[error("compressed blocks not supported")]
64     CompressedBlocksNotSupported,
65     #[error("failed to evict cache: {0}")]
66     EvictingCache(io::Error),
67     #[error("file larger than max of {}: {0}", MAX_QCOW_FILE_SIZE)]
68     FileTooBig(u64),
69     #[error("failed to get file size: {0}")]
70     GettingFileSize(io::Error),
71     #[error("failed to get refcount: {0}")]
72     GettingRefcount(refcount::Error),
73     #[error("failed to parse filename: {0}")]
74     InvalidBackingFileName(str::Utf8Error),
75     #[error("invalid cluster index")]
76     InvalidClusterIndex,
77     #[error("invalid cluster size")]
78     InvalidClusterSize,
79     #[error("invalid index")]
80     InvalidIndex,
81     #[error("invalid L1 table offset")]
82     InvalidL1TableOffset,
83     #[error("invalid L1 table size {0}")]
84     InvalidL1TableSize(u32),
85     #[error("invalid magic")]
86     InvalidMagic,
87     #[error("invalid offset")]
88     InvalidOffset(u64),
89     #[error("invalid refcount table offset")]
90     InvalidRefcountTableOffset,
91     #[error("invalid refcount table size: {0}")]
92     InvalidRefcountTableSize(u64),
93     #[error("no free clusters")]
94     NoFreeClusters,
95     #[error("no refcount clusters")]
96     NoRefcountClusters,
97     #[error("not enough space for refcounts")]
98     NotEnoughSpaceForRefcounts,
99     #[error("failed to open file: {0}")]
100     OpeningFile(io::Error),
101     #[error("failed to open file: {0}")]
102     ReadingHeader(io::Error),
103     #[error("failed to read pointers: {0}")]
104     ReadingPointers(io::Error),
105     #[error("failed to read ref count block: {0}")]
106     ReadingRefCountBlock(refcount::Error),
107     #[error("failed to read ref counts: {0}")]
108     ReadingRefCounts(io::Error),
109     #[error("failed to rebuild ref counts: {0}")]
110     RebuildingRefCounts(io::Error),
111     #[error("refcount table offset past file end")]
112     RefcountTableOffEnd,
113     #[error("too many clusters specified for refcount table")]
114     RefcountTableTooLarge,
115     #[error("failed to seek file: {0}")]
116     SeekingFile(io::Error),
117     #[error("failed to set refcount refcount: {0}")]
118     SettingRefcountRefcount(io::Error),
119     #[error("size too small for number of clusters")]
120     SizeTooSmallForNumberOfClusters,
121     #[error("l1 entry table too large: {0}")]
122     TooManyL1Entries(u64),
123     #[error("ref count table too large: {0}")]
124     TooManyRefcounts(u64),
125     #[error("unsupported refcount order")]
126     UnsupportedRefcountOrder,
127     #[error("unsupported version: {0}")]
128     UnsupportedVersion(u32),
129     #[error("failed to write header: {0}")]
130     WritingHeader(io::Error),
131 }
132 
133 pub type Result<T> = std::result::Result<T, Error>;
134 
135 // Maximum data size supported.
136 const MAX_QCOW_FILE_SIZE: u64 = 0x01 << 44; // 16 TB.
137 
138 // QCOW magic constant that starts the header.
139 pub const QCOW_MAGIC: u32 = 0x5146_49fb;
140 // Default to a cluster size of 2^DEFAULT_CLUSTER_BITS
141 const DEFAULT_CLUSTER_BITS: u32 = 16;
142 // Limit clusters to reasonable sizes. Choose the same limits as qemu. Making the clusters smaller
143 // increases the amount of overhead for book keeping.
144 const MIN_CLUSTER_BITS: u32 = 9;
145 const MAX_CLUSTER_BITS: u32 = 21;
146 // The L1 and RefCount table are kept in RAM, only handle files that require less than 35M entries.
147 // This easily covers 1 TB files. When support for bigger files is needed the assumptions made to
148 // keep these tables in RAM needs to be thrown out.
149 const MAX_RAM_POINTER_TABLE_SIZE: u64 = 35_000_000;
150 // Only support 2 byte refcounts, 2^refcount_order bits.
151 const DEFAULT_REFCOUNT_ORDER: u32 = 4;
152 
153 const V3_BARE_HEADER_SIZE: u32 = 104;
154 
155 // bits 0-8 and 56-63 are reserved.
156 const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
157 const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
158 // Flags
159 const COMPRESSED_FLAG: u64 = 1 << 62;
160 const CLUSTER_USED_FLAG: u64 = 1 << 63;
161 const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1 << 0;
162 
163 // The format supports a "header extension area", that crosvm does not use.
164 const QCOW_EMPTY_HEADER_EXTENSION_SIZE: u32 = 8;
165 
166 // Defined by the specification
167 const MAX_BACKING_FILE_SIZE: u32 = 1023;
168 
169 /// Contains the information from the header of a qcow file.
170 #[derive(Clone, Debug)]
171 pub struct QcowHeader {
172     pub magic: u32,
173     pub version: u32,
174 
175     pub backing_file_offset: u64,
176     pub backing_file_size: u32,
177 
178     pub cluster_bits: u32,
179     pub size: u64,
180     pub crypt_method: u32,
181 
182     pub l1_size: u32,
183     pub l1_table_offset: u64,
184 
185     pub refcount_table_offset: u64,
186     pub refcount_table_clusters: u32,
187 
188     pub nb_snapshots: u32,
189     pub snapshots_offset: u64,
190 
191     // v3 entries
192     pub incompatible_features: u64,
193     pub compatible_features: u64,
194     pub autoclear_features: u64,
195     pub refcount_order: u32,
196     pub header_size: u32,
197 
198     // Post-header entries
199     pub backing_file_path: Option<String>,
200 }
201 
202 // Reads the next u16 from the file.
read_u16_from_file(mut f: &File) -> Result<u16>203 fn read_u16_from_file(mut f: &File) -> Result<u16> {
204     let mut value = [0u8; 2];
205     (&mut f)
206         .read_exact(&mut value)
207         .map_err(Error::ReadingHeader)?;
208     Ok(u16::from_be_bytes(value))
209 }
210 
211 // Reads the next u32 from the file.
read_u32_from_file(mut f: &File) -> Result<u32>212 fn read_u32_from_file(mut f: &File) -> Result<u32> {
213     let mut value = [0u8; 4];
214     (&mut f)
215         .read_exact(&mut value)
216         .map_err(Error::ReadingHeader)?;
217     Ok(u32::from_be_bytes(value))
218 }
219 
220 // Reads the next u64 from the file.
read_u64_from_file(mut f: &File) -> Result<u64>221 fn read_u64_from_file(mut f: &File) -> Result<u64> {
222     let mut value = [0u8; 8];
223     (&mut f)
224         .read_exact(&mut value)
225         .map_err(Error::ReadingHeader)?;
226     Ok(u64::from_be_bytes(value))
227 }
228 
229 impl QcowHeader {
230     /// Creates a QcowHeader from a reference to a file.
new(f: &mut File) -> Result<QcowHeader>231     pub fn new(f: &mut File) -> Result<QcowHeader> {
232         f.seek(SeekFrom::Start(0)).map_err(Error::ReadingHeader)?;
233 
234         let magic = read_u32_from_file(f)?;
235         if magic != QCOW_MAGIC {
236             return Err(Error::InvalidMagic);
237         }
238 
239         let mut header = QcowHeader {
240             magic,
241             version: read_u32_from_file(f)?,
242             backing_file_offset: read_u64_from_file(f)?,
243             backing_file_size: read_u32_from_file(f)?,
244             cluster_bits: read_u32_from_file(f)?,
245             size: read_u64_from_file(f)?,
246             crypt_method: read_u32_from_file(f)?,
247             l1_size: read_u32_from_file(f)?,
248             l1_table_offset: read_u64_from_file(f)?,
249             refcount_table_offset: read_u64_from_file(f)?,
250             refcount_table_clusters: read_u32_from_file(f)?,
251             nb_snapshots: read_u32_from_file(f)?,
252             snapshots_offset: read_u64_from_file(f)?,
253             incompatible_features: read_u64_from_file(f)?,
254             compatible_features: read_u64_from_file(f)?,
255             autoclear_features: read_u64_from_file(f)?,
256             refcount_order: read_u32_from_file(f)?,
257             header_size: read_u32_from_file(f)?,
258             backing_file_path: None,
259         };
260         if header.backing_file_size > MAX_BACKING_FILE_SIZE {
261             return Err(Error::BackingFileTooLong(header.backing_file_size as usize));
262         }
263         if header.backing_file_offset != 0 {
264             f.seek(SeekFrom::Start(header.backing_file_offset))
265                 .map_err(Error::ReadingHeader)?;
266             let mut backing_file_name_bytes = vec![0u8; header.backing_file_size as usize];
267             f.read_exact(&mut backing_file_name_bytes)
268                 .map_err(Error::ReadingHeader)?;
269             header.backing_file_path = Some(
270                 String::from_utf8(backing_file_name_bytes)
271                     .map_err(|err| Error::InvalidBackingFileName(err.utf8_error()))?,
272             );
273         }
274         Ok(header)
275     }
276 
create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader>277     pub fn create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader> {
278         let cluster_bits: u32 = DEFAULT_CLUSTER_BITS;
279         let cluster_size: u32 = 0x01 << cluster_bits;
280         let max_length: usize =
281             (cluster_size - V3_BARE_HEADER_SIZE - QCOW_EMPTY_HEADER_EXTENSION_SIZE) as usize;
282         if let Some(path) = backing_file {
283             if path.len() > max_length {
284                 return Err(Error::BackingFileTooLong(path.len() - max_length));
285             }
286         }
287         // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses.
288         let l2_size: u32 = cluster_size / size_of::<u64>() as u32;
289         let num_clusters: u32 = div_round_up_u64(size, u64::from(cluster_size)) as u32;
290         let num_l2_clusters: u32 = div_round_up_u32(num_clusters, l2_size);
291         let l1_clusters: u32 = div_round_up_u32(num_l2_clusters, cluster_size);
292         let header_clusters = div_round_up_u32(size_of::<QcowHeader>() as u32, cluster_size);
293         Ok(QcowHeader {
294             magic: QCOW_MAGIC,
295             version: 3,
296             backing_file_offset: (if backing_file.is_none() {
297                 0
298             } else {
299                 V3_BARE_HEADER_SIZE + QCOW_EMPTY_HEADER_EXTENSION_SIZE
300             }) as u64,
301             backing_file_size: backing_file.map_or(0, |x| x.len()) as u32,
302             cluster_bits: DEFAULT_CLUSTER_BITS,
303             size,
304             crypt_method: 0,
305             l1_size: num_l2_clusters,
306             l1_table_offset: u64::from(cluster_size),
307             // The refcount table is after l1 + header.
308             refcount_table_offset: u64::from(cluster_size * (l1_clusters + 1)),
309             refcount_table_clusters: {
310                 // Pre-allocate enough clusters for the entire refcount table as it must be
311                 // continuous in the file. Allocate enough space to refcount all clusters, including
312                 // the refcount clusters.
313                 let max_refcount_clusters = max_refcount_clusters(
314                     DEFAULT_REFCOUNT_ORDER,
315                     cluster_size,
316                     num_clusters + l1_clusters + num_l2_clusters + header_clusters,
317                 ) as u32;
318                 // The refcount table needs to store the offset of each refcount cluster.
319                 div_round_up_u32(
320                     max_refcount_clusters * size_of::<u64>() as u32,
321                     cluster_size,
322                 )
323             },
324             nb_snapshots: 0,
325             snapshots_offset: 0,
326             incompatible_features: 0,
327             compatible_features: 0,
328             autoclear_features: 0,
329             refcount_order: DEFAULT_REFCOUNT_ORDER,
330             header_size: V3_BARE_HEADER_SIZE,
331             backing_file_path: backing_file.map(String::from),
332         })
333     }
334 
335     /// Write the header to `file`.
write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()>336     pub fn write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()> {
337         // Writes the next u32 to the file.
338         fn write_u32_to_file<F: Write>(f: &mut F, value: u32) -> Result<()> {
339             f.write_all(&value.to_be_bytes())
340                 .map_err(Error::WritingHeader)
341         }
342 
343         // Writes the next u64 to the file.
344         fn write_u64_to_file<F: Write>(f: &mut F, value: u64) -> Result<()> {
345             f.write_all(&value.to_be_bytes())
346                 .map_err(Error::WritingHeader)
347         }
348 
349         write_u32_to_file(file, self.magic)?;
350         write_u32_to_file(file, self.version)?;
351         write_u64_to_file(file, self.backing_file_offset)?;
352         write_u32_to_file(file, self.backing_file_size)?;
353         write_u32_to_file(file, self.cluster_bits)?;
354         write_u64_to_file(file, self.size)?;
355         write_u32_to_file(file, self.crypt_method)?;
356         write_u32_to_file(file, self.l1_size)?;
357         write_u64_to_file(file, self.l1_table_offset)?;
358         write_u64_to_file(file, self.refcount_table_offset)?;
359         write_u32_to_file(file, self.refcount_table_clusters)?;
360         write_u32_to_file(file, self.nb_snapshots)?;
361         write_u64_to_file(file, self.snapshots_offset)?;
362         write_u64_to_file(file, self.incompatible_features)?;
363         write_u64_to_file(file, self.compatible_features)?;
364         write_u64_to_file(file, self.autoclear_features)?;
365         write_u32_to_file(file, self.refcount_order)?;
366         write_u32_to_file(file, self.header_size)?;
367         write_u32_to_file(file, 0)?; // header extension type: end of header extension area
368         write_u32_to_file(file, 0)?; // length of header extension data: 0
369         if let Some(backing_file_path) = self.backing_file_path.as_ref() {
370             write!(file, "{}", backing_file_path).map_err(Error::WritingHeader)?;
371         }
372 
373         // Set the file length by seeking and writing a zero to the last byte. This avoids needing
374         // a `File` instead of anything that implements seek as the `file` argument.
375         // Zeros out the l1 and refcount table clusters.
376         let cluster_size = 0x01u64 << self.cluster_bits;
377         let refcount_blocks_size = u64::from(self.refcount_table_clusters) * cluster_size;
378         file.seek(SeekFrom::Start(
379             self.refcount_table_offset + refcount_blocks_size - 2,
380         ))
381         .map_err(Error::WritingHeader)?;
382         file.write(&[0u8]).map_err(Error::WritingHeader)?;
383 
384         Ok(())
385     }
386 }
387 
max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64388 fn max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64 {
389     // Use u64 as the product of the u32 inputs can overflow.
390     let refcount_bytes = (0x01 << refcount_order as u64) / 8;
391     let for_data = div_round_up_u64(num_clusters as u64 * refcount_bytes, cluster_size as u64);
392     let for_refcounts = div_round_up_u64(for_data * refcount_bytes, cluster_size as u64);
393     for_data + for_refcounts
394 }
395 
396 /// Represents a qcow2 file. This is a sparse file format maintained by the qemu project.
397 /// Full documentation of the format can be found in the qemu repository.
398 ///
399 /// # Example
400 ///
401 /// ```
402 /// # use base::FileReadWriteAtVolatile;
403 /// # use data_model::VolatileSlice;
404 /// # use disk::QcowFile;
405 /// # fn test(file: std::fs::File) -> std::io::Result<()> {
406 ///     let mut q = QcowFile::from(file, disk::MAX_NESTING_DEPTH).expect("Can't open qcow file");
407 ///     let mut buf = [0u8; 12];
408 ///     let mut vslice = VolatileSlice::new(&mut buf);
409 ///     q.read_at_volatile(vslice, 10)?;
410 /// #   Ok(())
411 /// # }
412 /// ```
413 #[derive(Debug)]
414 pub struct QcowFile {
415     raw_file: QcowRawFile,
416     header: QcowHeader,
417     l1_table: VecCache<u64>,
418     l2_entries: u64,
419     l2_cache: CacheMap<VecCache<u64>>,
420     refcounts: RefCount,
421     current_offset: u64,
422     unref_clusters: Vec<u64>, // List of freshly unreferenced clusters.
423     // List of unreferenced clusters available to be used. unref clusters become available once the
424     // removal of references to them have been synced to disk.
425     avail_clusters: Vec<u64>,
426     backing_file: Option<Box<dyn DiskFile>>,
427 }
428 
429 impl DiskFile for QcowFile {}
430 
431 impl QcowFile {
432     /// Creates a QcowFile from `file`. File must be a valid qcow2 image.
from(mut file: File, max_nesting_depth: u32) -> Result<QcowFile>433     pub fn from(mut file: File, max_nesting_depth: u32) -> Result<QcowFile> {
434         let header = QcowHeader::new(&mut file)?;
435 
436         // Only v3 files are supported.
437         if header.version != 3 {
438             return Err(Error::UnsupportedVersion(header.version));
439         }
440 
441         // Make sure that the L1 table fits in RAM.
442         if u64::from(header.l1_size) > MAX_RAM_POINTER_TABLE_SIZE {
443             return Err(Error::InvalidL1TableSize(header.l1_size));
444         }
445 
446         let cluster_bits: u32 = header.cluster_bits;
447         if !(MIN_CLUSTER_BITS..=MAX_CLUSTER_BITS).contains(&cluster_bits) {
448             return Err(Error::InvalidClusterSize);
449         }
450         let cluster_size = 0x01u64 << cluster_bits;
451 
452         // Limit the total size of the disk.
453         if header.size > MAX_QCOW_FILE_SIZE {
454             return Err(Error::FileTooBig(header.size));
455         }
456 
457         let backing_file = if let Some(backing_file_path) = header.backing_file_path.as_ref() {
458             let path = backing_file_path.clone();
459             let backing_raw_file = open_file(
460                 Path::new(&path),
461                 OpenOptions::new().read(true), // TODO(b/190435784): Add support for O_DIRECT.
462             )
463             .map_err(|e| Error::BackingFileIo(e.into()))?;
464             // is_sparse_file is false because qcow is internally sparse and we don't need file
465             // system sparseness on top of that.
466             let backing_file = create_disk_file(
467                 backing_raw_file,
468                 /* is_sparse_file= */ false,
469                 max_nesting_depth,
470                 Path::new(&path),
471             )
472             .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
473             Some(backing_file)
474         } else {
475             None
476         };
477 
478         // Only support two byte refcounts.
479         let refcount_bits: u64 = 0x01u64
480             .checked_shl(header.refcount_order)
481             .ok_or(Error::UnsupportedRefcountOrder)?;
482         if refcount_bits != 16 {
483             return Err(Error::UnsupportedRefcountOrder);
484         }
485         let refcount_bytes = (refcount_bits + 7) / 8;
486 
487         // Need at least one refcount cluster
488         if header.refcount_table_clusters == 0 {
489             return Err(Error::NoRefcountClusters);
490         }
491         offset_is_cluster_boundary(header.l1_table_offset, header.cluster_bits)?;
492         offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?;
493         // refcount table must be a cluster boundary, and within the file's virtual or actual size.
494         offset_is_cluster_boundary(header.refcount_table_offset, header.cluster_bits)?;
495         let file_size = file.metadata().map_err(Error::GettingFileSize)?.len();
496         if header.refcount_table_offset > max(file_size, header.size) {
497             return Err(Error::RefcountTableOffEnd);
498         }
499 
500         // The first cluster should always have a non-zero refcount, so if it is 0,
501         // this is an old file with broken refcounts, which requires a rebuild.
502         let mut refcount_rebuild_required = true;
503         file.seek(SeekFrom::Start(header.refcount_table_offset))
504             .map_err(Error::SeekingFile)?;
505         let first_refblock_addr = read_u64_from_file(&file)?;
506         if first_refblock_addr != 0 {
507             file.seek(SeekFrom::Start(first_refblock_addr))
508                 .map_err(Error::SeekingFile)?;
509             let first_cluster_refcount = read_u16_from_file(&file)?;
510             if first_cluster_refcount != 0 {
511                 refcount_rebuild_required = false;
512             }
513         }
514 
515         if (header.compatible_features & COMPATIBLE_FEATURES_LAZY_REFCOUNTS) != 0 {
516             refcount_rebuild_required = true;
517         }
518 
519         let mut raw_file =
520             QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?;
521         if refcount_rebuild_required {
522             QcowFile::rebuild_refcounts(&mut raw_file, header.clone())?;
523         }
524 
525         let l2_size = cluster_size / size_of::<u64>() as u64;
526         let num_clusters = div_round_up_u64(header.size, cluster_size);
527         let num_l2_clusters = div_round_up_u64(num_clusters, l2_size);
528         let l1_clusters = div_round_up_u64(num_l2_clusters, cluster_size);
529         let header_clusters = div_round_up_u64(size_of::<QcowHeader>() as u64, cluster_size);
530         if num_l2_clusters > MAX_RAM_POINTER_TABLE_SIZE {
531             return Err(Error::TooManyL1Entries(num_l2_clusters));
532         }
533         let l1_table = VecCache::from_vec(
534             raw_file
535                 .read_pointer_table(
536                     header.l1_table_offset,
537                     num_l2_clusters,
538                     Some(L1_TABLE_OFFSET_MASK),
539                 )
540                 .map_err(Error::ReadingHeader)?,
541         );
542 
543         let num_clusters = div_round_up_u64(header.size, cluster_size);
544         let refcount_clusters = max_refcount_clusters(
545             header.refcount_order,
546             cluster_size as u32,
547             (num_clusters + l1_clusters + num_l2_clusters + header_clusters) as u32,
548         );
549         // Check that the given header doesn't have a suspiciously sized refcount table.
550         if u64::from(header.refcount_table_clusters) > 2 * refcount_clusters {
551             return Err(Error::RefcountTableTooLarge);
552         }
553         if l1_clusters + refcount_clusters > MAX_RAM_POINTER_TABLE_SIZE {
554             return Err(Error::TooManyRefcounts(refcount_clusters));
555         }
556         let refcount_block_entries = cluster_size / refcount_bytes;
557         let refcounts = RefCount::new(
558             &mut raw_file,
559             header.refcount_table_offset,
560             refcount_clusters,
561             refcount_block_entries,
562             cluster_size,
563         )
564         .map_err(Error::ReadingRefCounts)?;
565 
566         let l2_entries = cluster_size / size_of::<u64>() as u64;
567 
568         let mut qcow = QcowFile {
569             raw_file,
570             header,
571             l1_table,
572             l2_entries,
573             l2_cache: CacheMap::new(100),
574             refcounts,
575             current_offset: 0,
576             unref_clusters: Vec::new(),
577             avail_clusters: Vec::new(),
578             backing_file,
579         };
580 
581         // Check that the L1 and refcount tables fit in a 64bit address space.
582         qcow.header
583             .l1_table_offset
584             .checked_add(qcow.l1_address_offset(qcow.virtual_size()))
585             .ok_or(Error::InvalidL1TableOffset)?;
586         qcow.header
587             .refcount_table_offset
588             .checked_add(u64::from(qcow.header.refcount_table_clusters) * cluster_size)
589             .ok_or(Error::InvalidRefcountTableOffset)?;
590 
591         qcow.find_avail_clusters()?;
592 
593         Ok(qcow)
594     }
595 
596     /// Creates a new QcowFile at the given path.
new(file: File, virtual_size: u64) -> Result<QcowFile>597     pub fn new(file: File, virtual_size: u64) -> Result<QcowFile> {
598         let header = QcowHeader::create_for_size_and_path(virtual_size, None)?;
599         QcowFile::new_from_header(file, header, 1)
600     }
601 
602     /// Creates a new QcowFile at the given path.
new_from_backing( file: File, backing_file_name: &str, backing_file_max_nesting_depth: u32, ) -> Result<QcowFile>603     pub fn new_from_backing(
604         file: File,
605         backing_file_name: &str,
606         backing_file_max_nesting_depth: u32,
607     ) -> Result<QcowFile> {
608         let backing_path = Path::new(backing_file_name);
609         let backing_raw_file = open_file(
610             backing_path,
611             OpenOptions::new().read(true), // TODO(b/190435784): add support for O_DIRECT.
612         )
613         .map_err(|e| Error::BackingFileIo(e.into()))?;
614         // is_sparse_file is false because qcow is internally sparse and we don't need file
615         // system sparseness on top of that.
616         let backing_file = create_disk_file(
617             backing_raw_file,
618             /* is_sparse_file= */ false,
619             backing_file_max_nesting_depth,
620             backing_path,
621         )
622         .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
623         let size = backing_file.get_len().map_err(Error::BackingFileIo)?;
624         let header = QcowHeader::create_for_size_and_path(size, Some(backing_file_name))?;
625         let mut result = QcowFile::new_from_header(file, header, backing_file_max_nesting_depth)?;
626         result.backing_file = Some(backing_file);
627         Ok(result)
628     }
629 
new_from_header( mut file: File, header: QcowHeader, max_nesting_depth: u32, ) -> Result<QcowFile>630     fn new_from_header(
631         mut file: File,
632         header: QcowHeader,
633         max_nesting_depth: u32,
634     ) -> Result<QcowFile> {
635         file.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?;
636         header.write_to(&mut file)?;
637 
638         let mut qcow = Self::from(file, max_nesting_depth)?;
639 
640         // Set the refcount for each refcount table cluster.
641         let cluster_size = 0x01u64 << qcow.header.cluster_bits;
642         let refcount_table_base = qcow.header.refcount_table_offset as u64;
643         let end_cluster_addr =
644             refcount_table_base + u64::from(qcow.header.refcount_table_clusters) * cluster_size;
645 
646         let mut cluster_addr = 0;
647         while cluster_addr < end_cluster_addr {
648             let mut unref_clusters = qcow
649                 .set_cluster_refcount(cluster_addr, 1)
650                 .map_err(Error::SettingRefcountRefcount)?;
651             qcow.unref_clusters.append(&mut unref_clusters);
652             cluster_addr += cluster_size;
653         }
654 
655         Ok(qcow)
656     }
657 
set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>)658     pub fn set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>) {
659         self.backing_file = backing;
660     }
661 
662     /// Returns the first cluster in the file with a 0 refcount. Used for testing.
first_zero_refcount(&mut self) -> Result<Option<u64>>663     pub fn first_zero_refcount(&mut self) -> Result<Option<u64>> {
664         let file_size = self
665             .raw_file
666             .file_mut()
667             .metadata()
668             .map_err(Error::GettingFileSize)?
669             .len();
670         let cluster_size = 0x01u64 << self.header.cluster_bits;
671 
672         let mut cluster_addr = 0;
673         while cluster_addr < file_size {
674             let cluster_refcount = self
675                 .refcounts
676                 .get_cluster_refcount(&mut self.raw_file, cluster_addr)
677                 .map_err(Error::GettingRefcount)?;
678             if cluster_refcount == 0 {
679                 return Ok(Some(cluster_addr));
680             }
681             cluster_addr += cluster_size;
682         }
683         Ok(None)
684     }
685 
find_avail_clusters(&mut self) -> Result<()>686     fn find_avail_clusters(&mut self) -> Result<()> {
687         let cluster_size = self.raw_file.cluster_size();
688 
689         let file_size = self
690             .raw_file
691             .file_mut()
692             .metadata()
693             .map_err(Error::GettingFileSize)?
694             .len();
695 
696         for i in (0..file_size).step_by(cluster_size as usize) {
697             let refcount = self
698                 .refcounts
699                 .get_cluster_refcount(&mut self.raw_file, i)
700                 .map_err(Error::GettingRefcount)?;
701             if refcount == 0 {
702                 self.avail_clusters.push(i);
703             }
704         }
705 
706         Ok(())
707     }
708 
709     /// Rebuild the reference count tables.
rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()>710     fn rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()> {
711         fn add_ref(refcounts: &mut [u16], cluster_size: u64, cluster_address: u64) -> Result<()> {
712             let idx = (cluster_address / cluster_size) as usize;
713             if idx >= refcounts.len() {
714                 return Err(Error::InvalidClusterIndex);
715             }
716             refcounts[idx] += 1;
717             Ok(())
718         }
719 
720         // Add a reference to the first cluster (header plus extensions).
721         fn set_header_refcount(refcounts: &mut [u16], cluster_size: u64) -> Result<()> {
722             add_ref(refcounts, cluster_size, 0)
723         }
724 
725         // Add references to the L1 table clusters.
726         fn set_l1_refcounts(
727             refcounts: &mut [u16],
728             header: QcowHeader,
729             cluster_size: u64,
730         ) -> Result<()> {
731             let l1_clusters = div_round_up_u64(header.l1_size as u64, cluster_size);
732             let l1_table_offset = header.l1_table_offset;
733             for i in 0..l1_clusters {
734                 add_ref(refcounts, cluster_size, l1_table_offset + i * cluster_size)?;
735             }
736             Ok(())
737         }
738 
739         // Traverse the L1 and L2 tables to find all reachable data clusters.
740         fn set_data_refcounts(
741             refcounts: &mut [u16],
742             header: QcowHeader,
743             cluster_size: u64,
744             raw_file: &mut QcowRawFile,
745         ) -> Result<()> {
746             let l1_table = raw_file
747                 .read_pointer_table(
748                     header.l1_table_offset,
749                     header.l1_size as u64,
750                     Some(L1_TABLE_OFFSET_MASK),
751                 )
752                 .map_err(Error::ReadingPointers)?;
753             for l1_index in 0..header.l1_size as usize {
754                 let l2_addr_disk = *l1_table.get(l1_index).ok_or(Error::InvalidIndex)?;
755                 if l2_addr_disk != 0 {
756                     // Add a reference to the L2 table cluster itself.
757                     add_ref(refcounts, cluster_size, l2_addr_disk)?;
758 
759                     // Read the L2 table and find all referenced data clusters.
760                     let l2_table = raw_file
761                         .read_pointer_table(
762                             l2_addr_disk,
763                             cluster_size / size_of::<u64>() as u64,
764                             Some(L2_TABLE_OFFSET_MASK),
765                         )
766                         .map_err(Error::ReadingPointers)?;
767                     for data_cluster_addr in l2_table {
768                         if data_cluster_addr != 0 {
769                             add_ref(refcounts, cluster_size, data_cluster_addr)?;
770                         }
771                     }
772                 }
773             }
774 
775             Ok(())
776         }
777 
778         // Add references to the top-level refcount table clusters.
779         fn set_refcount_table_refcounts(
780             refcounts: &mut [u16],
781             header: QcowHeader,
782             cluster_size: u64,
783         ) -> Result<()> {
784             let refcount_table_offset = header.refcount_table_offset;
785             for i in 0..header.refcount_table_clusters as u64 {
786                 add_ref(
787                     refcounts,
788                     cluster_size,
789                     refcount_table_offset + i * cluster_size,
790                 )?;
791             }
792             Ok(())
793         }
794 
795         // Allocate clusters for refblocks.
796         // This needs to be done last so that we have the correct refcounts for all other
797         // clusters.
798         fn alloc_refblocks(
799             refcounts: &mut [u16],
800             cluster_size: u64,
801             refblock_clusters: u64,
802             pointers_per_cluster: u64,
803         ) -> Result<Vec<u64>> {
804             let refcount_table_entries = div_round_up_u64(refblock_clusters, pointers_per_cluster);
805             let mut ref_table = vec![0; refcount_table_entries as usize];
806             let mut first_free_cluster: u64 = 0;
807             for refblock_addr in &mut ref_table {
808                 loop {
809                     if first_free_cluster >= refcounts.len() as u64 {
810                         return Err(Error::NotEnoughSpaceForRefcounts);
811                     }
812                     if refcounts[first_free_cluster as usize] == 0 {
813                         break;
814                     }
815                     first_free_cluster += 1;
816                 }
817 
818                 *refblock_addr = first_free_cluster * cluster_size;
819                 add_ref(refcounts, cluster_size, *refblock_addr)?;
820 
821                 first_free_cluster += 1;
822             }
823 
824             Ok(ref_table)
825         }
826 
827         // Write the updated reference count blocks and reftable.
828         fn write_refblocks(
829             refcounts: &[u16],
830             mut header: QcowHeader,
831             ref_table: &[u64],
832             raw_file: &mut QcowRawFile,
833             refcount_block_entries: u64,
834         ) -> Result<()> {
835             // Rewrite the header with lazy refcounts enabled while we are rebuilding the tables.
836             header.compatible_features |= COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
837             raw_file
838                 .file_mut()
839                 .seek(SeekFrom::Start(0))
840                 .map_err(Error::SeekingFile)?;
841             header.write_to(raw_file.file_mut())?;
842 
843             for (i, refblock_addr) in ref_table.iter().enumerate() {
844                 // Write a block of refcounts to the location indicated by refblock_addr.
845                 let refblock_start = i * (refcount_block_entries as usize);
846                 let refblock_end = min(
847                     refcounts.len(),
848                     refblock_start + refcount_block_entries as usize,
849                 );
850                 let refblock = &refcounts[refblock_start..refblock_end];
851                 raw_file
852                     .write_refcount_block(*refblock_addr, refblock)
853                     .map_err(Error::WritingHeader)?;
854 
855                 // If this is the last (partial) cluster, pad it out to a full refblock cluster.
856                 if refblock.len() < refcount_block_entries as usize {
857                     let refblock_padding =
858                         vec![0u16; refcount_block_entries as usize - refblock.len()];
859                     raw_file
860                         .write_refcount_block(
861                             *refblock_addr + refblock.len() as u64 * 2,
862                             &refblock_padding,
863                         )
864                         .map_err(Error::WritingHeader)?;
865                 }
866             }
867 
868             // Rewrite the top-level refcount table.
869             raw_file
870                 .write_pointer_table(header.refcount_table_offset, ref_table, 0)
871                 .map_err(Error::WritingHeader)?;
872 
873             // Rewrite the header again, now with lazy refcounts disabled.
874             header.compatible_features &= !COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
875             raw_file
876                 .file_mut()
877                 .seek(SeekFrom::Start(0))
878                 .map_err(Error::SeekingFile)?;
879             header.write_to(raw_file.file_mut())?;
880 
881             Ok(())
882         }
883 
884         let cluster_size = raw_file.cluster_size();
885 
886         let file_size = raw_file
887             .file_mut()
888             .metadata()
889             .map_err(Error::GettingFileSize)?
890             .len();
891 
892         let refcount_bits = 1u64 << header.refcount_order;
893         let refcount_bytes = div_round_up_u64(refcount_bits, 8);
894         let refcount_block_entries = cluster_size / refcount_bytes;
895         let pointers_per_cluster = cluster_size / size_of::<u64>() as u64;
896         let data_clusters = div_round_up_u64(header.size, cluster_size);
897         let l2_clusters = div_round_up_u64(data_clusters, pointers_per_cluster);
898         let l1_clusters = div_round_up_u64(l2_clusters, cluster_size);
899         let header_clusters = div_round_up_u64(size_of::<QcowHeader>() as u64, cluster_size);
900         let max_clusters = data_clusters + l2_clusters + l1_clusters + header_clusters;
901         let mut max_valid_cluster_index = max_clusters;
902         let refblock_clusters = div_round_up_u64(max_valid_cluster_index, refcount_block_entries);
903         let reftable_clusters = div_round_up_u64(refblock_clusters, pointers_per_cluster);
904         // Account for refblocks and the ref table size needed to address them.
905         let refblocks_for_refs = div_round_up_u64(
906             refblock_clusters + reftable_clusters,
907             refcount_block_entries,
908         );
909         let reftable_clusters_for_refs =
910             div_round_up_u64(refblocks_for_refs, refcount_block_entries);
911         max_valid_cluster_index += refblock_clusters + reftable_clusters;
912         max_valid_cluster_index += refblocks_for_refs + reftable_clusters_for_refs;
913 
914         if max_valid_cluster_index > MAX_RAM_POINTER_TABLE_SIZE {
915             return Err(Error::InvalidRefcountTableSize(max_valid_cluster_index));
916         }
917 
918         let max_valid_cluster_offset = max_valid_cluster_index * cluster_size;
919         if max_valid_cluster_offset < file_size - cluster_size {
920             return Err(Error::InvalidRefcountTableSize(max_valid_cluster_offset));
921         }
922 
923         let mut refcounts = vec![0; max_valid_cluster_index as usize];
924 
925         // Find all references clusters and rebuild refcounts.
926         set_header_refcount(&mut refcounts, cluster_size)?;
927         set_l1_refcounts(&mut refcounts, header.clone(), cluster_size)?;
928         set_data_refcounts(&mut refcounts, header.clone(), cluster_size, raw_file)?;
929         set_refcount_table_refcounts(&mut refcounts, header.clone(), cluster_size)?;
930 
931         // Allocate clusters to store the new reference count blocks.
932         let ref_table = alloc_refblocks(
933             &mut refcounts,
934             cluster_size,
935             refblock_clusters,
936             pointers_per_cluster,
937         )?;
938 
939         // Write updated reference counts and point the reftable at them.
940         write_refblocks(
941             &refcounts,
942             header,
943             &ref_table,
944             raw_file,
945             refcount_block_entries,
946         )
947     }
948 
949     // Limits the range so that it doesn't exceed the virtual size of the file.
limit_range_file(&self, address: u64, count: usize) -> usize950     fn limit_range_file(&self, address: u64, count: usize) -> usize {
951         if address.checked_add(count as u64).is_none() || address > self.virtual_size() {
952             return 0;
953         }
954         min(count as u64, self.virtual_size() - address) as usize
955     }
956 
957     // Limits the range so that it doesn't overflow the end of a cluster.
limit_range_cluster(&self, address: u64, count: usize) -> usize958     fn limit_range_cluster(&self, address: u64, count: usize) -> usize {
959         let offset: u64 = self.raw_file.cluster_offset(address);
960         let limit = self.raw_file.cluster_size() - offset;
961         min(count as u64, limit) as usize
962     }
963 
964     // Gets the maximum virtual size of this image.
virtual_size(&self) -> u64965     fn virtual_size(&self) -> u64 {
966         self.header.size
967     }
968 
969     // Gets the offset of `address` in the L1 table.
l1_address_offset(&self, address: u64) -> u64970     fn l1_address_offset(&self, address: u64) -> u64 {
971         let l1_index = self.l1_table_index(address);
972         l1_index * size_of::<u64>() as u64
973     }
974 
975     // Gets the offset of `address` in the L1 table.
l1_table_index(&self, address: u64) -> u64976     fn l1_table_index(&self, address: u64) -> u64 {
977         (address / self.raw_file.cluster_size()) / self.l2_entries
978     }
979 
980     // Gets the offset of `address` in the L2 table.
l2_table_index(&self, address: u64) -> u64981     fn l2_table_index(&self, address: u64) -> u64 {
982         (address / self.raw_file.cluster_size()) % self.l2_entries
983     }
984 
985     // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters have
986     // yet to be allocated, return None.
file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>>987     fn file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>> {
988         if address >= self.virtual_size() as u64 {
989             return Err(std::io::Error::from_raw_os_error(EINVAL));
990         }
991 
992         let l1_index = self.l1_table_index(address) as usize;
993         let l2_addr_disk = *self
994             .l1_table
995             .get(l1_index)
996             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
997 
998         if l2_addr_disk == 0 {
999             // Reading from an unallocated cluster will return zeros.
1000             return Ok(None);
1001         }
1002 
1003         let l2_index = self.l2_table_index(address) as usize;
1004 
1005         if !self.l2_cache.contains_key(&l1_index) {
1006             // Not in the cache.
1007             let table =
1008                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1009 
1010             let l1_table = &self.l1_table;
1011             let raw_file = &mut self.raw_file;
1012             self.l2_cache.insert(l1_index, table, |index, evicted| {
1013                 raw_file.write_pointer_table(
1014                     l1_table[index],
1015                     evicted.get_values(),
1016                     CLUSTER_USED_FLAG,
1017                 )
1018             })?;
1019         };
1020 
1021         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1022         if cluster_addr == 0 {
1023             return Ok(None);
1024         }
1025         Ok(Some(cluster_addr + self.raw_file.cluster_offset(address)))
1026     }
1027 
1028     // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters need
1029     // to be allocated, they will be.
file_offset_write(&mut self, address: u64) -> std::io::Result<u64>1030     fn file_offset_write(&mut self, address: u64) -> std::io::Result<u64> {
1031         if address >= self.virtual_size() as u64 {
1032             return Err(std::io::Error::from_raw_os_error(EINVAL));
1033         }
1034 
1035         let l1_index = self.l1_table_index(address) as usize;
1036         let l2_addr_disk = *self
1037             .l1_table
1038             .get(l1_index)
1039             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1040         let l2_index = self.l2_table_index(address) as usize;
1041 
1042         let mut set_refcounts = Vec::new();
1043 
1044         if !self.l2_cache.contains_key(&l1_index) {
1045             // Not in the cache.
1046             let l2_table = if l2_addr_disk == 0 {
1047                 // Allocate a new cluster to store the L2 table and update the L1 table to point
1048                 // to the new table.
1049                 let new_addr: u64 = self.get_new_cluster(None)?;
1050                 // The cluster refcount starts at one meaning it is used but doesn't need COW.
1051                 set_refcounts.push((new_addr, 1));
1052                 self.l1_table[l1_index] = new_addr;
1053                 VecCache::new(self.l2_entries as usize)
1054             } else {
1055                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?)
1056             };
1057             let l1_table = &self.l1_table;
1058             let raw_file = &mut self.raw_file;
1059             self.l2_cache.insert(l1_index, l2_table, |index, evicted| {
1060                 raw_file.write_pointer_table(
1061                     l1_table[index],
1062                     evicted.get_values(),
1063                     CLUSTER_USED_FLAG,
1064                 )
1065             })?;
1066         }
1067 
1068         let cluster_addr = match self.l2_cache.get(&l1_index).unwrap()[l2_index] {
1069             0 => {
1070                 let initial_data = if let Some(backing) = self.backing_file.as_mut() {
1071                     let cluster_size = self.raw_file.cluster_size();
1072                     let cluster_begin = address - (address % cluster_size);
1073                     let mut cluster_data = vec![0u8; cluster_size as usize];
1074                     let volatile_slice = VolatileSlice::new(&mut cluster_data);
1075                     backing.read_exact_at_volatile(volatile_slice, cluster_begin)?;
1076                     Some(cluster_data)
1077                 } else {
1078                     None
1079                 };
1080                 // Need to allocate a data cluster
1081                 let cluster_addr = self.append_data_cluster(initial_data)?;
1082                 self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?;
1083                 cluster_addr
1084             }
1085             a => a,
1086         };
1087 
1088         for (addr, count) in set_refcounts {
1089             let mut newly_unref = self.set_cluster_refcount(addr, count)?;
1090             self.unref_clusters.append(&mut newly_unref);
1091         }
1092 
1093         Ok(cluster_addr + self.raw_file.cluster_offset(address))
1094     }
1095 
1096     // Updates the l1 and l2 tables to point to the new `cluster_addr`.
update_cluster_addr( &mut self, l1_index: usize, l2_index: usize, cluster_addr: u64, set_refcounts: &mut Vec<(u64, u16)>, ) -> io::Result<()>1097     fn update_cluster_addr(
1098         &mut self,
1099         l1_index: usize,
1100         l2_index: usize,
1101         cluster_addr: u64,
1102         set_refcounts: &mut Vec<(u64, u16)>,
1103     ) -> io::Result<()> {
1104         if !self.l2_cache.get(&l1_index).unwrap().dirty() {
1105             // Free the previously used cluster if one exists. Modified tables are always
1106             // witten to new clusters so the L1 table can be committed to disk after they
1107             // are and L1 never points at an invalid table.
1108             // The index must be valid from when it was insterted.
1109             let addr = self.l1_table[l1_index];
1110             if addr != 0 {
1111                 self.unref_clusters.push(addr);
1112                 set_refcounts.push((addr, 0));
1113             }
1114 
1115             // Allocate a new cluster to store the L2 table and update the L1 table to point
1116             // to the new table. The cluster will be written when the cache is flushed, no
1117             // need to copy the data now.
1118             let new_addr: u64 = self.get_new_cluster(None)?;
1119             // The cluster refcount starts at one indicating it is used but doesn't need
1120             // COW.
1121             set_refcounts.push((new_addr, 1));
1122             self.l1_table[l1_index] = new_addr;
1123         }
1124         // 'unwrap' is OK because it was just added.
1125         self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = cluster_addr;
1126         Ok(())
1127     }
1128 
1129     // Allocate a new cluster and return its offset within the raw file.
get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64>1130     fn get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1131         // First use a pre allocated cluster if one is available.
1132         if let Some(free_cluster) = self.avail_clusters.pop() {
1133             if let Some(initial_data) = initial_data {
1134                 self.raw_file.write_cluster(free_cluster, initial_data)?;
1135             } else {
1136                 self.raw_file.zero_cluster(free_cluster)?;
1137             }
1138             return Ok(free_cluster);
1139         }
1140 
1141         let max_valid_cluster_offset = self.refcounts.max_valid_cluster_offset();
1142         if let Some(new_cluster) = self.raw_file.add_cluster_end(max_valid_cluster_offset)? {
1143             if let Some(initial_data) = initial_data {
1144                 self.raw_file.write_cluster(new_cluster, initial_data)?;
1145             }
1146             Ok(new_cluster)
1147         } else {
1148             error!("No free clusters in get_new_cluster()");
1149             Err(std::io::Error::from_raw_os_error(ENOSPC))
1150         }
1151     }
1152 
1153     // Allocate and initialize a new data cluster. Returns the offset of the
1154     // cluster in to the file on success.
append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64>1155     fn append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1156         let new_addr: u64 = self.get_new_cluster(initial_data)?;
1157         // The cluster refcount starts at one indicating it is used but doesn't need COW.
1158         let mut newly_unref = self.set_cluster_refcount(new_addr, 1)?;
1159         self.unref_clusters.append(&mut newly_unref);
1160         Ok(new_addr)
1161     }
1162 
1163     // Deallocate the storage for the cluster starting at `address`.
1164     // Any future reads of this cluster will return all zeroes (or the backing file, if in use).
deallocate_cluster(&mut self, address: u64) -> std::io::Result<()>1165     fn deallocate_cluster(&mut self, address: u64) -> std::io::Result<()> {
1166         if address >= self.virtual_size() as u64 {
1167             return Err(std::io::Error::from_raw_os_error(EINVAL));
1168         }
1169 
1170         let l1_index = self.l1_table_index(address) as usize;
1171         let l2_addr_disk = *self
1172             .l1_table
1173             .get(l1_index)
1174             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1175         let l2_index = self.l2_table_index(address) as usize;
1176 
1177         if l2_addr_disk == 0 {
1178             // The whole L2 table for this address is not allocated yet,
1179             // so the cluster must also be unallocated.
1180             return Ok(());
1181         }
1182 
1183         if !self.l2_cache.contains_key(&l1_index) {
1184             // Not in the cache.
1185             let table =
1186                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1187             let l1_table = &self.l1_table;
1188             let raw_file = &mut self.raw_file;
1189             self.l2_cache.insert(l1_index, table, |index, evicted| {
1190                 raw_file.write_pointer_table(
1191                     l1_table[index],
1192                     evicted.get_values(),
1193                     CLUSTER_USED_FLAG,
1194                 )
1195             })?;
1196         }
1197 
1198         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1199         if cluster_addr == 0 {
1200             // This cluster is already unallocated; nothing to do.
1201             return Ok(());
1202         }
1203 
1204         // Decrement the refcount.
1205         let refcount = self
1206             .refcounts
1207             .get_cluster_refcount(&mut self.raw_file, cluster_addr)
1208             .map_err(|_| std::io::Error::from_raw_os_error(EINVAL))?;
1209         if refcount == 0 {
1210             return Err(std::io::Error::from_raw_os_error(EINVAL));
1211         }
1212 
1213         let new_refcount = refcount - 1;
1214         let mut newly_unref = self.set_cluster_refcount(cluster_addr, new_refcount)?;
1215         self.unref_clusters.append(&mut newly_unref);
1216 
1217         // Rewrite the L2 entry to remove the cluster mapping.
1218         // unwrap is safe as we just checked/inserted this entry.
1219         self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = 0;
1220 
1221         if new_refcount == 0 {
1222             let cluster_size = self.raw_file.cluster_size();
1223             // This cluster is no longer in use; deallocate the storage.
1224             // The underlying FS may not support FALLOC_FL_PUNCH_HOLE,
1225             // so don't treat an error as fatal.  Future reads will return zeros anyways.
1226             let _ = self
1227                 .raw_file
1228                 .file_mut()
1229                 .punch_hole_mut(cluster_addr, cluster_size);
1230             self.unref_clusters.push(cluster_addr);
1231         }
1232         Ok(())
1233     }
1234 
1235     // Fill a range of `length` bytes starting at `address` with zeroes.
1236     // Any future reads of this range will return all zeroes.
1237     // If there is no backing file, this will deallocate cluster storage when possible.
zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()>1238     fn zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()> {
1239         let write_count: usize = self.limit_range_file(address, length);
1240 
1241         let mut nwritten: usize = 0;
1242         while nwritten < write_count {
1243             let curr_addr = address + nwritten as u64;
1244             let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1245 
1246             if self.backing_file.is_none() && count == self.raw_file.cluster_size() as usize {
1247                 // Full cluster and no backing file in use - deallocate the storage.
1248                 self.deallocate_cluster(curr_addr)?;
1249             } else {
1250                 // Partial cluster - zero out the relevant bytes.
1251                 let offset = if self.backing_file.is_some() {
1252                     // There is a backing file, so we need to allocate a cluster in order to
1253                     // zero out the hole-punched bytes such that the backing file contents do not
1254                     // show through.
1255                     Some(self.file_offset_write(curr_addr)?)
1256                 } else {
1257                     // Any space in unallocated clusters can be left alone, since
1258                     // unallocated clusters already read back as zeroes.
1259                     self.file_offset_read(curr_addr)?
1260                 };
1261                 if let Some(offset) = offset {
1262                     // Partial cluster - zero it out.
1263                     self.raw_file
1264                         .file_mut()
1265                         .write_zeroes_all_at(offset, count)?;
1266                 }
1267             }
1268 
1269             nwritten += count;
1270         }
1271         Ok(())
1272     }
1273 
1274     // Reads an L2 cluster from the disk, returning an error if the file can't be read or if any
1275     // cluster is compressed.
read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>>1276     fn read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>> {
1277         let file_values = raw_file.read_pointer_cluster(cluster_addr, None)?;
1278         if file_values.iter().any(|entry| entry & COMPRESSED_FLAG != 0) {
1279             return Err(std::io::Error::from_raw_os_error(ENOTSUP));
1280         }
1281         Ok(file_values
1282             .iter()
1283             .map(|entry| *entry & L2_TABLE_OFFSET_MASK)
1284             .collect())
1285     }
1286 
1287     // Set the refcount for a cluster with the given address.
1288     // Returns a list of any refblocks that can be reused, this happens when a refblock is moved,
1289     // the old location can be reused.
set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>>1290     fn set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>> {
1291         let mut added_clusters = Vec::new();
1292         let mut unref_clusters = Vec::new();
1293         let mut refcount_set = false;
1294         let mut new_cluster = None;
1295 
1296         while !refcount_set {
1297             match self.refcounts.set_cluster_refcount(
1298                 &mut self.raw_file,
1299                 address,
1300                 refcount,
1301                 new_cluster.take(),
1302             ) {
1303                 Ok(None) => {
1304                     refcount_set = true;
1305                 }
1306                 Ok(Some(freed_cluster)) => {
1307                     unref_clusters.push(freed_cluster);
1308                     refcount_set = true;
1309                 }
1310                 Err(refcount::Error::EvictingRefCounts(e)) => {
1311                     return Err(e);
1312                 }
1313                 Err(refcount::Error::InvalidIndex) => {
1314                     return Err(std::io::Error::from_raw_os_error(EINVAL));
1315                 }
1316                 Err(refcount::Error::NeedCluster(addr)) => {
1317                     // Read the address and call set_cluster_refcount again.
1318                     new_cluster = Some((
1319                         addr,
1320                         VecCache::from_vec(self.raw_file.read_refcount_block(addr)?),
1321                     ));
1322                 }
1323                 Err(refcount::Error::NeedNewCluster) => {
1324                     // Allocate the cluster and call set_cluster_refcount again.
1325                     let addr = self.get_new_cluster(None)?;
1326                     added_clusters.push(addr);
1327                     new_cluster = Some((
1328                         addr,
1329                         VecCache::new(self.refcounts.refcounts_per_block() as usize),
1330                     ));
1331                 }
1332                 Err(refcount::Error::ReadingRefCounts(e)) => {
1333                     return Err(e);
1334                 }
1335             }
1336         }
1337 
1338         for addr in added_clusters {
1339             self.set_cluster_refcount(addr, 1)?;
1340         }
1341         Ok(unref_clusters)
1342     }
1343 
sync_caches(&mut self) -> std::io::Result<()>1344     fn sync_caches(&mut self) -> std::io::Result<()> {
1345         // Write out all dirty L2 tables.
1346         for (l1_index, l2_table) in self.l2_cache.iter_mut().filter(|(_k, v)| v.dirty()) {
1347             // The index must be valid from when we insterted it.
1348             let addr = self.l1_table[*l1_index];
1349             if addr != 0 {
1350                 self.raw_file.write_pointer_table(
1351                     addr,
1352                     l2_table.get_values(),
1353                     CLUSTER_USED_FLAG,
1354                 )?;
1355             } else {
1356                 return Err(std::io::Error::from_raw_os_error(EINVAL));
1357             }
1358             l2_table.mark_clean();
1359         }
1360         // Write the modified refcount blocks.
1361         self.refcounts.flush_blocks(&mut self.raw_file)?;
1362         // Make sure metadata(file len) and all data clusters are written.
1363         self.raw_file.file_mut().sync_all()?;
1364 
1365         // Push L1 table and refcount table last as all the clusters they point to are now
1366         // guaranteed to be valid.
1367         let mut sync_required = false;
1368         if self.l1_table.dirty() {
1369             self.raw_file.write_pointer_table(
1370                 self.header.l1_table_offset,
1371                 self.l1_table.get_values(),
1372                 0,
1373             )?;
1374             self.l1_table.mark_clean();
1375             sync_required = true;
1376         }
1377         sync_required |= self.refcounts.flush_table(&mut self.raw_file)?;
1378         if sync_required {
1379             self.raw_file.file_mut().sync_data()?;
1380         }
1381         Ok(())
1382     }
1383 
1384     // Reads `count` bytes starting at `address`, calling `cb` repeatedly with the data source,
1385     // number of bytes read so far, offset to read from, and number of bytes to read from the file
1386     // in that invocation. If None is given to `cb` in place of the backing file, the `cb` should
1387     // infer zeros would have been read.
read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize> where F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,1388     fn read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1389     where
1390         F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,
1391     {
1392         let read_count: usize = self.limit_range_file(address, count);
1393 
1394         let mut nread: usize = 0;
1395         while nread < read_count {
1396             let curr_addr = address + nread as u64;
1397             let file_offset = self.file_offset_read(curr_addr)?;
1398             let count = self.limit_range_cluster(curr_addr, read_count - nread);
1399 
1400             if let Some(offset) = file_offset {
1401                 cb(Some(self.raw_file.file_mut()), nread, offset, count)?;
1402             } else if let Some(backing) = self.backing_file.as_mut() {
1403                 cb(Some(backing.as_mut()), nread, curr_addr, count)?;
1404             } else {
1405                 cb(None, nread, 0, count)?;
1406             }
1407 
1408             nread += count;
1409         }
1410         Ok(read_count)
1411     }
1412 
1413     // Writes `count` bytes starting at `address`, calling `cb` repeatedly with the backing file,
1414     // number of bytes written so far, raw file offset, and number of bytes to write to the file in
1415     // that invocation.
write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize> where F: FnMut(&mut File, usize, u64, usize) -> std::io::Result<()>,1416     fn write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1417     where
1418         F: FnMut(&mut File, usize, u64, usize) -> std::io::Result<()>,
1419     {
1420         let write_count: usize = self.limit_range_file(address, count);
1421 
1422         let mut nwritten: usize = 0;
1423         while nwritten < write_count {
1424             let curr_addr = address + nwritten as u64;
1425             let offset = self.file_offset_write(curr_addr)?;
1426             let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1427 
1428             cb(self.raw_file.file_mut(), nwritten, offset, count)?;
1429 
1430             nwritten += count;
1431         }
1432         Ok(write_count)
1433     }
1434 }
1435 
1436 impl Drop for QcowFile {
drop(&mut self)1437     fn drop(&mut self) {
1438         let _ = self.sync_caches();
1439     }
1440 }
1441 
1442 impl AsRawDescriptors for QcowFile {
as_raw_descriptors(&self) -> Vec<RawDescriptor>1443     fn as_raw_descriptors(&self) -> Vec<RawDescriptor> {
1444         let mut descriptors = vec![self.raw_file.file().as_raw_descriptor()];
1445         if let Some(backing) = &self.backing_file {
1446             descriptors.append(&mut backing.as_raw_descriptors());
1447         }
1448         descriptors
1449     }
1450 }
1451 
1452 impl Read for QcowFile {
read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>1453     fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1454         let len = buf.len();
1455         let slice = VolatileSlice::new(buf);
1456         let read_count = self.read_cb(
1457             self.current_offset,
1458             len,
1459             |file, already_read, offset, count| {
1460                 let sub_slice = slice.get_slice(already_read, count).unwrap();
1461                 match file {
1462                     Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1463                     None => {
1464                         sub_slice.write_bytes(0);
1465                         Ok(())
1466                     }
1467                 }
1468             },
1469         )?;
1470         self.current_offset += read_count as u64;
1471         Ok(read_count)
1472     }
1473 }
1474 
1475 impl Seek for QcowFile {
seek(&mut self, pos: SeekFrom) -> std::io::Result<u64>1476     fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
1477         let new_offset: Option<u64> = match pos {
1478             SeekFrom::Start(off) => Some(off),
1479             SeekFrom::End(off) => {
1480                 if off < 0 {
1481                     0i64.checked_sub(off)
1482                         .and_then(|increment| self.virtual_size().checked_sub(increment as u64))
1483                 } else {
1484                     self.virtual_size().checked_add(off as u64)
1485                 }
1486             }
1487             SeekFrom::Current(off) => {
1488                 if off < 0 {
1489                     0i64.checked_sub(off)
1490                         .and_then(|increment| self.current_offset.checked_sub(increment as u64))
1491                 } else {
1492                     self.current_offset.checked_add(off as u64)
1493                 }
1494             }
1495         };
1496 
1497         if let Some(o) = new_offset {
1498             if o <= self.virtual_size() {
1499                 self.current_offset = o;
1500                 return Ok(o);
1501             }
1502         }
1503         Err(std::io::Error::from_raw_os_error(EINVAL))
1504     }
1505 }
1506 
1507 impl Write for QcowFile {
write(&mut self, buf: &[u8]) -> std::io::Result<usize>1508     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1509         let write_count = self.write_cb(
1510             self.current_offset,
1511             buf.len(),
1512             |file, offset, raw_offset, count| {
1513                 file.seek(SeekFrom::Start(raw_offset))?;
1514                 file.write_all(&buf[offset..(offset + count)])
1515             },
1516         )?;
1517         self.current_offset += write_count as u64;
1518         Ok(write_count)
1519     }
1520 
flush(&mut self) -> std::io::Result<()>1521     fn flush(&mut self) -> std::io::Result<()> {
1522         self.fsync()
1523     }
1524 }
1525 
1526 impl FileReadWriteAtVolatile for QcowFile {
read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize>1527     fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1528         self.read_cb(offset, slice.size(), |file, read, offset, count| {
1529             let sub_slice = slice.get_slice(read, count).unwrap();
1530             match file {
1531                 Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1532                 None => {
1533                     sub_slice.write_bytes(0);
1534                     Ok(())
1535                 }
1536             }
1537         })
1538     }
1539 
write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize>1540     fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1541         self.write_cb(offset, slice.size(), |file, offset, raw_offset, count| {
1542             let sub_slice = slice.get_slice(offset, count).unwrap();
1543             file.write_all_at_volatile(sub_slice, raw_offset)
1544         })
1545     }
1546 }
1547 
1548 impl FileSync for QcowFile {
fsync(&mut self) -> std::io::Result<()>1549     fn fsync(&mut self) -> std::io::Result<()> {
1550         self.sync_caches()?;
1551         self.avail_clusters.append(&mut self.unref_clusters);
1552         Ok(())
1553     }
1554 }
1555 
1556 impl FileSetLen for QcowFile {
set_len(&self, _len: u64) -> std::io::Result<()>1557     fn set_len(&self, _len: u64) -> std::io::Result<()> {
1558         Err(std::io::Error::new(
1559             std::io::ErrorKind::Other,
1560             "set_len() not supported for QcowFile",
1561         ))
1562     }
1563 }
1564 
1565 impl DiskGetLen for QcowFile {
get_len(&self) -> io::Result<u64>1566     fn get_len(&self) -> io::Result<u64> {
1567         Ok(self.virtual_size())
1568     }
1569 }
1570 
1571 impl FileAllocate for QcowFile {
allocate(&mut self, offset: u64, len: u64) -> io::Result<()>1572     fn allocate(&mut self, offset: u64, len: u64) -> io::Result<()> {
1573         // Call write_cb with a do-nothing callback, which will have the effect
1574         // of allocating all clusters in the specified range.
1575         self.write_cb(
1576             offset,
1577             len as usize,
1578             |_file, _offset, _raw_offset, _count| Ok(()),
1579         )?;
1580         Ok(())
1581     }
1582 }
1583 
1584 impl PunchHoleMut for QcowFile {
punch_hole_mut(&mut self, offset: u64, length: u64) -> std::io::Result<()>1585     fn punch_hole_mut(&mut self, offset: u64, length: u64) -> std::io::Result<()> {
1586         let mut remaining = length;
1587         let mut offset = offset;
1588         while remaining > 0 {
1589             let chunk_length = min(remaining, std::usize::MAX as u64) as usize;
1590             self.zero_bytes(offset, chunk_length)?;
1591             remaining -= chunk_length as u64;
1592             offset += chunk_length as u64;
1593         }
1594         Ok(())
1595     }
1596 }
1597 
1598 impl WriteZeroesAt for QcowFile {
write_zeroes_at(&mut self, offset: u64, length: usize) -> io::Result<usize>1599     fn write_zeroes_at(&mut self, offset: u64, length: usize) -> io::Result<usize> {
1600         self.punch_hole_mut(offset, length as u64)?;
1601         Ok(length)
1602     }
1603 }
1604 
1605 impl ToAsyncDisk for QcowFile {
to_async_disk(self: Box<Self>, ex: &Executor) -> crate::Result<Box<dyn AsyncDisk>>1606     fn to_async_disk(self: Box<Self>, ex: &Executor) -> crate::Result<Box<dyn AsyncDisk>> {
1607         Ok(Box::new(AsyncDiskFileWrapper::new(*self, ex)))
1608     }
1609 }
1610 
1611 // Returns an Error if the given offset doesn't align to a cluster boundary.
offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()>1612 fn offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()> {
1613     if offset & ((0x01 << cluster_bits) - 1) != 0 {
1614         return Err(Error::InvalidOffset(offset));
1615     }
1616     Ok(())
1617 }
1618 
1619 // Ceiling of the division of `dividend`/`divisor`.
div_round_up_u64(dividend: u64, divisor: u64) -> u641620 fn div_round_up_u64(dividend: u64, divisor: u64) -> u64 {
1621     dividend / divisor + u64::from(dividend % divisor != 0)
1622 }
1623 
1624 // Ceiling of the division of `dividend`/`divisor`.
div_round_up_u32(dividend: u32, divisor: u32) -> u321625 fn div_round_up_u32(dividend: u32, divisor: u32) -> u32 {
1626     dividend / divisor + u32::from(dividend % divisor != 0)
1627 }
1628 
1629 #[cfg(test)]
1630 mod tests {
1631     use std::fs::OpenOptions;
1632     use std::io::Read;
1633     use std::io::Seek;
1634     use std::io::SeekFrom;
1635     use std::io::Write;
1636 
1637     use tempfile::tempfile;
1638     use tempfile::TempDir;
1639 
1640     use super::*;
1641     use crate::MAX_NESTING_DEPTH;
1642 
valid_header() -> Vec<u8>1643     fn valid_header() -> Vec<u8> {
1644         vec![
1645             0x51u8, 0x46, 0x49, 0xfb, // magic
1646             0x00, 0x00, 0x00, 0x03, // version
1647             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1648             0x00, 0x00, 0x00, 0x00, // backing file size
1649             0x00, 0x00, 0x00, 0x10, // cluster_bits
1650             0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, // size
1651             0x00, 0x00, 0x00, 0x00, // crypt method
1652             0x00, 0x00, 0x01, 0x00, // L1 size
1653             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1654             0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1655             0x00, 0x00, 0x00, 0x03, // refcount table clusters
1656             0x00, 0x00, 0x00, 0x00, // nb snapshots
1657             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1658             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1659             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1660             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1661             0x00, 0x00, 0x00, 0x04, // refcount_order
1662             0x00, 0x00, 0x00, 0x68, // header_length
1663         ]
1664     }
1665 
1666     // Test case found by clusterfuzz to allocate excessive memory.
test_huge_header() -> Vec<u8>1667     fn test_huge_header() -> Vec<u8> {
1668         vec![
1669             0x51, 0x46, 0x49, 0xfb, // magic
1670             0x00, 0x00, 0x00, 0x03, // version
1671             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1672             0x00, 0x00, 0x00, 0x00, // backing file size
1673             0x00, 0x00, 0x00, 0x09, // cluster_bits
1674             0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, // size
1675             0x00, 0x00, 0x00, 0x00, // crypt method
1676             0x00, 0x00, 0x01, 0x00, // L1 size
1677             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1678             0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1679             0x00, 0x00, 0x00, 0x03, // refcount table clusters
1680             0x00, 0x00, 0x00, 0x00, // nb snapshots
1681             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1682             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1683             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1684             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1685             0x00, 0x00, 0x00, 0x04, // refcount_order
1686             0x00, 0x00, 0x00, 0x68, // header_length
1687         ]
1688     }
1689 
basic_file(header: &[u8]) -> File1690     fn basic_file(header: &[u8]) -> File {
1691         let mut disk_file = tempfile().expect("failed to create temp file");
1692         disk_file.write_all(header).unwrap();
1693         disk_file.set_len(0x8000_0000).unwrap();
1694         disk_file.seek(SeekFrom::Start(0)).unwrap();
1695         disk_file
1696     }
1697 
with_basic_file<F>(header: &[u8], mut testfn: F) where F: FnMut(File),1698     fn with_basic_file<F>(header: &[u8], mut testfn: F)
1699     where
1700         F: FnMut(File),
1701     {
1702         testfn(basic_file(header)); // File closed when the function exits.
1703     }
1704 
with_default_file<F>(file_size: u64, mut testfn: F) where F: FnMut(QcowFile),1705     fn with_default_file<F>(file_size: u64, mut testfn: F)
1706     where
1707         F: FnMut(QcowFile),
1708     {
1709         let file = tempfile().expect("failed to create temp file");
1710         let qcow_file = QcowFile::new(file, file_size).unwrap();
1711 
1712         testfn(qcow_file); // File closed when the function exits.
1713     }
1714 
1715     // Test helper function to convert a normal slice to a VolatileSlice and write it.
write_all_at(qcow: &mut QcowFile, data: &[u8], offset: u64) -> std::io::Result<()>1716     fn write_all_at(qcow: &mut QcowFile, data: &[u8], offset: u64) -> std::io::Result<()> {
1717         let mut mem = data.to_owned();
1718         let vslice = VolatileSlice::new(&mut mem);
1719         qcow.write_all_at_volatile(vslice, offset)
1720     }
1721 
1722     // Test helper function to read to a VolatileSlice and copy it to a normal slice.
read_exact_at(qcow: &mut QcowFile, data: &mut [u8], offset: u64) -> std::io::Result<()>1723     fn read_exact_at(qcow: &mut QcowFile, data: &mut [u8], offset: u64) -> std::io::Result<()> {
1724         let mut mem = data.to_owned();
1725         let vslice = VolatileSlice::new(&mut mem);
1726         qcow.read_exact_at_volatile(vslice, offset)?;
1727         vslice.copy_to(data);
1728         Ok(())
1729     }
1730 
1731     #[test]
default_header()1732     fn default_header() {
1733         let header = QcowHeader::create_for_size_and_path(0x10_0000, None);
1734         let mut disk_file = tempfile().expect("failed to create temp file");
1735         header
1736             .expect("Failed to create header.")
1737             .write_to(&mut disk_file)
1738             .expect("Failed to write header to shm.");
1739         disk_file.seek(SeekFrom::Start(0)).unwrap();
1740         QcowFile::from(disk_file, MAX_NESTING_DEPTH)
1741             .expect("Failed to create Qcow from default Header");
1742     }
1743 
1744     #[test]
header_read()1745     fn header_read() {
1746         with_basic_file(&valid_header(), |mut disk_file: File| {
1747             QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
1748         });
1749     }
1750 
1751     #[test]
header_with_backing()1752     fn header_with_backing() {
1753         let header = QcowHeader::create_for_size_and_path(0x10_0000, Some("/my/path/to/a/file"))
1754             .expect("Failed to create header.");
1755         let mut disk_file = tempfile().expect("failed to create temp file");
1756         header
1757             .write_to(&mut disk_file)
1758             .expect("Failed to write header to shm.");
1759         disk_file.seek(SeekFrom::Start(0)).unwrap();
1760         let read_header = QcowHeader::new(&mut disk_file).expect("Failed to create header.");
1761         assert_eq!(
1762             header.backing_file_path,
1763             Some(String::from("/my/path/to/a/file"))
1764         );
1765         assert_eq!(read_header.backing_file_path, header.backing_file_path);
1766     }
1767 
1768     #[test]
invalid_magic()1769     fn invalid_magic() {
1770         let invalid_header = vec![0x51u8, 0x46, 0x4a, 0xfb];
1771         with_basic_file(&invalid_header, |mut disk_file: File| {
1772             QcowHeader::new(&mut disk_file).expect_err("Invalid header worked.");
1773         });
1774     }
1775 
1776     #[test]
invalid_refcount_order()1777     fn invalid_refcount_order() {
1778         let mut header = valid_header();
1779         header[99] = 2;
1780         with_basic_file(&header, |disk_file: File| {
1781             QcowFile::from(disk_file, MAX_NESTING_DEPTH)
1782                 .expect_err("Invalid refcount order worked.");
1783         });
1784     }
1785 
1786     #[test]
invalid_cluster_bits()1787     fn invalid_cluster_bits() {
1788         let mut header = valid_header();
1789         header[23] = 3;
1790         with_basic_file(&header, |disk_file: File| {
1791             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1792         });
1793     }
1794 
1795     #[test]
test_header_huge_file()1796     fn test_header_huge_file() {
1797         let header = test_huge_header();
1798         with_basic_file(&header, |disk_file: File| {
1799             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1800         });
1801     }
1802 
1803     #[test]
test_header_excessive_file_size_rejected()1804     fn test_header_excessive_file_size_rejected() {
1805         let mut header = valid_header();
1806         header[24..32].copy_from_slice(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e]);
1807         with_basic_file(&header, |disk_file: File| {
1808             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1809         });
1810     }
1811 
1812     #[test]
test_huge_l1_table()1813     fn test_huge_l1_table() {
1814         let mut header = valid_header();
1815         header[36] = 0x12;
1816         with_basic_file(&header, |disk_file: File| {
1817             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1818         });
1819     }
1820 
1821     #[test]
test_header_1_tb_file_min_cluster()1822     fn test_header_1_tb_file_min_cluster() {
1823         let mut header = test_huge_header();
1824         header[24] = 0;
1825         header[26] = 1;
1826         header[31] = 0;
1827         // 1 TB with the min cluster size makes the arrays too big, it should fail.
1828         with_basic_file(&header, |disk_file: File| {
1829             QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect_err("Failed to create file.");
1830         });
1831     }
1832 
1833     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1834     #[test]
test_header_1_tb_file()1835     fn test_header_1_tb_file() {
1836         let mut header = test_huge_header();
1837         // reset to 1 TB size.
1838         header[24] = 0;
1839         header[26] = 1;
1840         header[31] = 0;
1841         // set cluster_bits
1842         header[23] = 16;
1843         with_basic_file(&header, |disk_file: File| {
1844             let mut qcow =
1845                 QcowFile::from(disk_file, MAX_NESTING_DEPTH).expect("Failed to create file.");
1846             let value = 0x0000_0040_3f00_ffffu64;
1847             write_all_at(&mut qcow, &value.to_le_bytes(), 0x100_0000_0000 - 8)
1848                 .expect("failed to write data");
1849         });
1850     }
1851 
1852     #[test]
test_header_huge_num_refcounts()1853     fn test_header_huge_num_refcounts() {
1854         let mut header = valid_header();
1855         header[56..60].copy_from_slice(&[0x02, 0x00, 0xe8, 0xff]);
1856         with_basic_file(&header, |disk_file: File| {
1857             QcowFile::from(disk_file, MAX_NESTING_DEPTH)
1858                 .expect_err("Created disk with excessive refcount clusters");
1859         });
1860     }
1861 
1862     #[test]
test_header_huge_refcount_offset()1863     fn test_header_huge_refcount_offset() {
1864         let mut header = valid_header();
1865         header[48..56].copy_from_slice(&[0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x02, 0x00]);
1866         with_basic_file(&header, |disk_file: File| {
1867             QcowFile::from(disk_file, MAX_NESTING_DEPTH)
1868                 .expect_err("Created disk with excessive refcount offset");
1869         });
1870     }
1871 
1872     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1873     #[test]
write_read_start()1874     fn write_read_start() {
1875         with_basic_file(&valid_header(), |disk_file: File| {
1876             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1877             write_all_at(&mut q, b"test first bytes", 0).expect("Failed to write test string.");
1878             let mut buf = [0u8; 4];
1879             read_exact_at(&mut q, &mut buf, 0).expect("Failed to read.");
1880             assert_eq!(&buf, b"test");
1881         });
1882     }
1883 
1884     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1885     #[test]
write_read_start_backing()1886     fn write_read_start_backing() {
1887         let disk_file = basic_file(&valid_header());
1888         let mut backing = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1889         write_all_at(&mut backing, b"test first bytes", 0).expect("Failed to write test string.");
1890         let mut buf = [0u8; 4];
1891         let wrapping_disk_file = basic_file(&valid_header());
1892         let mut wrapping = QcowFile::from(wrapping_disk_file, MAX_NESTING_DEPTH).unwrap();
1893         wrapping.set_backing_file(Some(Box::new(backing)));
1894         read_exact_at(&mut wrapping, &mut buf, 0).expect("Failed to read.");
1895         assert_eq!(&buf, b"test");
1896     }
1897 
1898     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1899     #[test]
write_read_start_backing_overlap()1900     fn write_read_start_backing_overlap() {
1901         let disk_file = basic_file(&valid_header());
1902         let mut backing = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1903         write_all_at(&mut backing, b"test first bytes", 0).expect("Failed to write test string.");
1904         let wrapping_disk_file = basic_file(&valid_header());
1905         let mut wrapping = QcowFile::from(wrapping_disk_file, MAX_NESTING_DEPTH).unwrap();
1906         wrapping.set_backing_file(Some(Box::new(backing)));
1907         write_all_at(&mut wrapping, b"TEST", 0).expect("Failed to write second test string.");
1908         let mut buf = [0u8; 10];
1909         read_exact_at(&mut wrapping, &mut buf, 0).expect("Failed to read.");
1910         assert_eq!(&buf, b"TEST first");
1911     }
1912 
1913     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1914     #[test]
offset_write_read()1915     fn offset_write_read() {
1916         with_basic_file(&valid_header(), |disk_file: File| {
1917             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1918             let b = [0x55u8; 0x1000];
1919             write_all_at(&mut q, &b, 0xfff2000).expect("Failed to write test string.");
1920             let mut buf = [0u8; 4];
1921             read_exact_at(&mut q, &mut buf, 0xfff2000).expect("Failed to read.");
1922             assert_eq!(buf[0], 0x55);
1923         });
1924     }
1925 
1926     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1927     #[test]
write_zeroes_read()1928     fn write_zeroes_read() {
1929         with_basic_file(&valid_header(), |disk_file: File| {
1930             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1931             // Write some test data.
1932             let b = [0x55u8; 0x1000];
1933             write_all_at(&mut q, &b, 0xfff2000).expect("Failed to write test string.");
1934             // Overwrite the test data with zeroes.
1935             q.write_zeroes_all_at(0xfff2000, 0x200)
1936                 .expect("Failed to write zeroes.");
1937             // Verify that the correct part of the data was zeroed out.
1938             let mut buf = [0u8; 0x1000];
1939             read_exact_at(&mut q, &mut buf, 0xfff2000).expect("Failed to read.");
1940             assert_eq!(buf[0], 0);
1941             assert_eq!(buf[0x1FF], 0);
1942             assert_eq!(buf[0x200], 0x55);
1943             assert_eq!(buf[0xFFF], 0x55);
1944         });
1945     }
1946 
1947     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1948     #[test]
write_zeroes_full_cluster()1949     fn write_zeroes_full_cluster() {
1950         // Choose a size that is larger than a cluster.
1951         // valid_header uses cluster_bits = 12, which corresponds to a cluster size of 4096.
1952         const CHUNK_SIZE: usize = 4096 * 2 + 512;
1953         with_basic_file(&valid_header(), |disk_file: File| {
1954             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1955             // Write some test data.
1956             let b = [0x55u8; CHUNK_SIZE];
1957             write_all_at(&mut q, &b, 0).expect("Failed to write test string.");
1958             // Overwrite the full cluster with zeroes.
1959             q.write_zeroes_all_at(0, CHUNK_SIZE)
1960                 .expect("Failed to write zeroes.");
1961             // Verify that the data was zeroed out.
1962             let mut buf = [0u8; CHUNK_SIZE];
1963             read_exact_at(&mut q, &mut buf, 0).expect("Failed to read.");
1964             assert_eq!(buf[0], 0);
1965             assert_eq!(buf[CHUNK_SIZE - 1], 0);
1966         });
1967     }
1968 
1969     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1970     #[test]
write_zeroes_backing()1971     fn write_zeroes_backing() {
1972         let disk_file = basic_file(&valid_header());
1973         let mut backing = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1974         // Write some test data.
1975         let b = [0x55u8; 0x1000];
1976         write_all_at(&mut backing, &b, 0xfff2000).expect("Failed to write test string.");
1977         let wrapping_disk_file = basic_file(&valid_header());
1978         let mut wrapping = QcowFile::from(wrapping_disk_file, MAX_NESTING_DEPTH).unwrap();
1979         wrapping.set_backing_file(Some(Box::new(backing)));
1980         // Overwrite the test data with zeroes.
1981         // This should allocate new clusters in the wrapping file so that they can be zeroed.
1982         wrapping
1983             .write_zeroes_all_at(0xfff2000, 0x200)
1984             .expect("Failed to write zeroes.");
1985         // Verify that the correct part of the data was zeroed out.
1986         let mut buf = [0u8; 0x1000];
1987         read_exact_at(&mut wrapping, &mut buf, 0xfff2000).expect("Failed to read.");
1988         assert_eq!(buf[0], 0);
1989         assert_eq!(buf[0x1FF], 0);
1990         assert_eq!(buf[0x200], 0x55);
1991         assert_eq!(buf[0xFFF], 0x55);
1992     }
1993     #[test]
test_header()1994     fn test_header() {
1995         with_basic_file(&valid_header(), |disk_file: File| {
1996             let q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
1997             assert_eq!(q.virtual_size(), 0x20_0000_0000);
1998         });
1999     }
2000 
2001     #[test]
read_small_buffer()2002     fn read_small_buffer() {
2003         with_basic_file(&valid_header(), |disk_file: File| {
2004             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
2005             let mut b = [5u8; 16];
2006             read_exact_at(&mut q, &mut b, 1000).expect("Failed to read.");
2007             assert_eq!(0, b[0]);
2008             assert_eq!(0, b[15]);
2009         });
2010     }
2011 
2012     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2013     #[test]
replay_ext4()2014     fn replay_ext4() {
2015         with_basic_file(&valid_header(), |disk_file: File| {
2016             let mut q = QcowFile::from(disk_file, MAX_NESTING_DEPTH).unwrap();
2017             const BUF_SIZE: usize = 0x1000;
2018             let mut b = [0u8; BUF_SIZE];
2019 
2020             struct Transfer {
2021                 pub write: bool,
2022                 pub addr: u64,
2023             }
2024 
2025             // Write transactions from mkfs.ext4.
2026             let xfers: Vec<Transfer> = vec![
2027                 Transfer {
2028                     write: false,
2029                     addr: 0xfff0000,
2030                 },
2031                 Transfer {
2032                     write: false,
2033                     addr: 0xfffe000,
2034                 },
2035                 Transfer {
2036                     write: false,
2037                     addr: 0x0,
2038                 },
2039                 Transfer {
2040                     write: false,
2041                     addr: 0x1000,
2042                 },
2043                 Transfer {
2044                     write: false,
2045                     addr: 0xffff000,
2046                 },
2047                 Transfer {
2048                     write: false,
2049                     addr: 0xffdf000,
2050                 },
2051                 Transfer {
2052                     write: false,
2053                     addr: 0xfff8000,
2054                 },
2055                 Transfer {
2056                     write: false,
2057                     addr: 0xffe0000,
2058                 },
2059                 Transfer {
2060                     write: false,
2061                     addr: 0xffce000,
2062                 },
2063                 Transfer {
2064                     write: false,
2065                     addr: 0xffb6000,
2066                 },
2067                 Transfer {
2068                     write: false,
2069                     addr: 0xffab000,
2070                 },
2071                 Transfer {
2072                     write: false,
2073                     addr: 0xffa4000,
2074                 },
2075                 Transfer {
2076                     write: false,
2077                     addr: 0xff8e000,
2078                 },
2079                 Transfer {
2080                     write: false,
2081                     addr: 0xff86000,
2082                 },
2083                 Transfer {
2084                     write: false,
2085                     addr: 0xff84000,
2086                 },
2087                 Transfer {
2088                     write: false,
2089                     addr: 0xff89000,
2090                 },
2091                 Transfer {
2092                     write: false,
2093                     addr: 0xfe7e000,
2094                 },
2095                 Transfer {
2096                     write: false,
2097                     addr: 0x100000,
2098                 },
2099                 Transfer {
2100                     write: false,
2101                     addr: 0x3000,
2102                 },
2103                 Transfer {
2104                     write: false,
2105                     addr: 0x7000,
2106                 },
2107                 Transfer {
2108                     write: false,
2109                     addr: 0xf000,
2110                 },
2111                 Transfer {
2112                     write: false,
2113                     addr: 0x2000,
2114                 },
2115                 Transfer {
2116                     write: false,
2117                     addr: 0x4000,
2118                 },
2119                 Transfer {
2120                     write: false,
2121                     addr: 0x5000,
2122                 },
2123                 Transfer {
2124                     write: false,
2125                     addr: 0x6000,
2126                 },
2127                 Transfer {
2128                     write: false,
2129                     addr: 0x8000,
2130                 },
2131                 Transfer {
2132                     write: false,
2133                     addr: 0x9000,
2134                 },
2135                 Transfer {
2136                     write: false,
2137                     addr: 0xa000,
2138                 },
2139                 Transfer {
2140                     write: false,
2141                     addr: 0xb000,
2142                 },
2143                 Transfer {
2144                     write: false,
2145                     addr: 0xc000,
2146                 },
2147                 Transfer {
2148                     write: false,
2149                     addr: 0xd000,
2150                 },
2151                 Transfer {
2152                     write: false,
2153                     addr: 0xe000,
2154                 },
2155                 Transfer {
2156                     write: false,
2157                     addr: 0x10000,
2158                 },
2159                 Transfer {
2160                     write: false,
2161                     addr: 0x11000,
2162                 },
2163                 Transfer {
2164                     write: false,
2165                     addr: 0x12000,
2166                 },
2167                 Transfer {
2168                     write: false,
2169                     addr: 0x13000,
2170                 },
2171                 Transfer {
2172                     write: false,
2173                     addr: 0x14000,
2174                 },
2175                 Transfer {
2176                     write: false,
2177                     addr: 0x15000,
2178                 },
2179                 Transfer {
2180                     write: false,
2181                     addr: 0x16000,
2182                 },
2183                 Transfer {
2184                     write: false,
2185                     addr: 0x17000,
2186                 },
2187                 Transfer {
2188                     write: false,
2189                     addr: 0x18000,
2190                 },
2191                 Transfer {
2192                     write: false,
2193                     addr: 0x19000,
2194                 },
2195                 Transfer {
2196                     write: false,
2197                     addr: 0x1a000,
2198                 },
2199                 Transfer {
2200                     write: false,
2201                     addr: 0x1b000,
2202                 },
2203                 Transfer {
2204                     write: false,
2205                     addr: 0x1c000,
2206                 },
2207                 Transfer {
2208                     write: false,
2209                     addr: 0x1d000,
2210                 },
2211                 Transfer {
2212                     write: false,
2213                     addr: 0x1e000,
2214                 },
2215                 Transfer {
2216                     write: false,
2217                     addr: 0x1f000,
2218                 },
2219                 Transfer {
2220                     write: false,
2221                     addr: 0x21000,
2222                 },
2223                 Transfer {
2224                     write: false,
2225                     addr: 0x22000,
2226                 },
2227                 Transfer {
2228                     write: false,
2229                     addr: 0x24000,
2230                 },
2231                 Transfer {
2232                     write: false,
2233                     addr: 0x40000,
2234                 },
2235                 Transfer {
2236                     write: false,
2237                     addr: 0x0,
2238                 },
2239                 Transfer {
2240                     write: false,
2241                     addr: 0x3000,
2242                 },
2243                 Transfer {
2244                     write: false,
2245                     addr: 0x7000,
2246                 },
2247                 Transfer {
2248                     write: false,
2249                     addr: 0x0,
2250                 },
2251                 Transfer {
2252                     write: false,
2253                     addr: 0x1000,
2254                 },
2255                 Transfer {
2256                     write: false,
2257                     addr: 0x2000,
2258                 },
2259                 Transfer {
2260                     write: false,
2261                     addr: 0x3000,
2262                 },
2263                 Transfer {
2264                     write: false,
2265                     addr: 0x0,
2266                 },
2267                 Transfer {
2268                     write: false,
2269                     addr: 0x449000,
2270                 },
2271                 Transfer {
2272                     write: false,
2273                     addr: 0x48000,
2274                 },
2275                 Transfer {
2276                     write: false,
2277                     addr: 0x48000,
2278                 },
2279                 Transfer {
2280                     write: false,
2281                     addr: 0x448000,
2282                 },
2283                 Transfer {
2284                     write: false,
2285                     addr: 0x44a000,
2286                 },
2287                 Transfer {
2288                     write: false,
2289                     addr: 0x48000,
2290                 },
2291                 Transfer {
2292                     write: false,
2293                     addr: 0x48000,
2294                 },
2295                 Transfer {
2296                     write: true,
2297                     addr: 0x0,
2298                 },
2299                 Transfer {
2300                     write: true,
2301                     addr: 0x448000,
2302                 },
2303                 Transfer {
2304                     write: true,
2305                     addr: 0x449000,
2306                 },
2307                 Transfer {
2308                     write: true,
2309                     addr: 0x44a000,
2310                 },
2311                 Transfer {
2312                     write: true,
2313                     addr: 0xfff0000,
2314                 },
2315                 Transfer {
2316                     write: true,
2317                     addr: 0xfff1000,
2318                 },
2319                 Transfer {
2320                     write: true,
2321                     addr: 0xfff2000,
2322                 },
2323                 Transfer {
2324                     write: true,
2325                     addr: 0xfff3000,
2326                 },
2327                 Transfer {
2328                     write: true,
2329                     addr: 0xfff4000,
2330                 },
2331                 Transfer {
2332                     write: true,
2333                     addr: 0xfff5000,
2334                 },
2335                 Transfer {
2336                     write: true,
2337                     addr: 0xfff6000,
2338                 },
2339                 Transfer {
2340                     write: true,
2341                     addr: 0xfff7000,
2342                 },
2343                 Transfer {
2344                     write: true,
2345                     addr: 0xfff8000,
2346                 },
2347                 Transfer {
2348                     write: true,
2349                     addr: 0xfff9000,
2350                 },
2351                 Transfer {
2352                     write: true,
2353                     addr: 0xfffa000,
2354                 },
2355                 Transfer {
2356                     write: true,
2357                     addr: 0xfffb000,
2358                 },
2359                 Transfer {
2360                     write: true,
2361                     addr: 0xfffc000,
2362                 },
2363                 Transfer {
2364                     write: true,
2365                     addr: 0xfffd000,
2366                 },
2367                 Transfer {
2368                     write: true,
2369                     addr: 0xfffe000,
2370                 },
2371                 Transfer {
2372                     write: true,
2373                     addr: 0xffff000,
2374                 },
2375             ];
2376 
2377             for xfer in &xfers {
2378                 if xfer.write {
2379                     write_all_at(&mut q, &b, xfer.addr).expect("Failed to write.");
2380                 } else {
2381                     read_exact_at(&mut q, &mut b, xfer.addr).expect("Failed to read.");
2382                 }
2383             }
2384         });
2385     }
2386 
2387     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2388     #[test]
combo_write_read()2389     fn combo_write_read() {
2390         with_default_file(1024 * 1024 * 1024 * 256, |mut qcow_file| {
2391             const NUM_BLOCKS: usize = 55;
2392             const BLOCK_SIZE: usize = 0x1_0000;
2393             const OFFSET: u64 = 0x1_0000_0020;
2394             let data = [0x55u8; BLOCK_SIZE];
2395             let mut readback = [0u8; BLOCK_SIZE];
2396             for i in 0..NUM_BLOCKS {
2397                 let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2398                 write_all_at(&mut qcow_file, &data, seek_offset)
2399                     .expect("Failed to write test data.");
2400                 // Read back the data to check it was written correctly.
2401                 read_exact_at(&mut qcow_file, &mut readback, seek_offset).expect("Failed to read.");
2402                 for (orig, read) in data.iter().zip(readback.iter()) {
2403                     assert_eq!(orig, read);
2404                 }
2405             }
2406             // Check that address 0 is still zeros.
2407             read_exact_at(&mut qcow_file, &mut readback, 0).expect("Failed to read.");
2408             for read in readback.iter() {
2409                 assert_eq!(*read, 0);
2410             }
2411             // Check the data again after the writes have happened.
2412             for i in 0..NUM_BLOCKS {
2413                 let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2414                 read_exact_at(&mut qcow_file, &mut readback, seek_offset).expect("Failed to read.");
2415                 for (orig, read) in data.iter().zip(readback.iter()) {
2416                     assert_eq!(orig, read);
2417                 }
2418             }
2419 
2420             assert_eq!(qcow_file.first_zero_refcount().unwrap(), None);
2421         });
2422     }
2423 
2424     #[test]
rebuild_refcounts()2425     fn rebuild_refcounts() {
2426         with_basic_file(&valid_header(), |mut disk_file: File| {
2427             let header = QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
2428             let cluster_size = 65536;
2429             let mut raw_file =
2430                 QcowRawFile::from(disk_file, cluster_size).expect("Failed to create QcowRawFile.");
2431             QcowFile::rebuild_refcounts(&mut raw_file, header)
2432                 .expect("Failed to rebuild recounts.");
2433         });
2434     }
2435 
2436     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2437     #[test]
nested_qcow()2438     fn nested_qcow() {
2439         let tmp_dir = TempDir::new().unwrap();
2440 
2441         // A file `backing` is backing a qcow file `qcow.l1`, which in turn is backing another
2442         // qcow file.
2443         let backing_file_path = tmp_dir.path().join("backing");
2444         let _backing_file = OpenOptions::new()
2445             .read(true)
2446             .write(true)
2447             .create(true)
2448             .open(&backing_file_path)
2449             .unwrap();
2450 
2451         let level1_qcow_file_path = tmp_dir.path().join("qcow.l1");
2452         let level1_qcow_file = OpenOptions::new()
2453             .read(true)
2454             .write(true)
2455             .create(true)
2456             .open(&level1_qcow_file_path)
2457             .unwrap();
2458         let _level1_qcow_file = QcowFile::new_from_backing(
2459             level1_qcow_file,
2460             backing_file_path.to_str().unwrap(),
2461             1000, /* allow deep nesting */
2462         )
2463         .unwrap();
2464 
2465         let level2_qcow_file = tempfile().unwrap();
2466         let _level2_qcow_file = QcowFile::new_from_backing(
2467             level2_qcow_file,
2468             level1_qcow_file_path.to_str().unwrap(),
2469             1000, /* allow deep nesting */
2470         )
2471         .expect("failed to create level2 qcow file");
2472     }
2473 
2474     #[test]
io_seek()2475     fn io_seek() {
2476         with_default_file(1024 * 1024 * 10, |mut qcow_file| {
2477             // Cursor should start at 0.
2478             assert_eq!(qcow_file.seek(SeekFrom::Current(0)).unwrap(), 0);
2479 
2480             // Seek 1 MB from start.
2481             assert_eq!(
2482                 qcow_file.seek(SeekFrom::Start(1024 * 1024)).unwrap(),
2483                 1024 * 1024
2484             );
2485 
2486             // Rewind 1 MB + 1 byte (past beginning) - seeking to a negative offset is an error and
2487             // should not move the cursor.
2488             qcow_file
2489                 .seek(SeekFrom::Current(-(1024 * 1024 + 1)))
2490                 .expect_err("negative offset seek should fail");
2491             assert_eq!(qcow_file.seek(SeekFrom::Current(0)).unwrap(), 1024 * 1024);
2492 
2493             // Seek to last byte.
2494             assert_eq!(
2495                 qcow_file.seek(SeekFrom::End(-1)).unwrap(),
2496                 1024 * 1024 * 10 - 1
2497             );
2498 
2499             // Seek to EOF.
2500             assert_eq!(qcow_file.seek(SeekFrom::End(0)).unwrap(), 1024 * 1024 * 10);
2501 
2502             // Seek past EOF is not allowed.
2503             qcow_file
2504                 .seek(SeekFrom::End(1))
2505                 .expect_err("seek past EOF should fail");
2506         });
2507     }
2508 
2509     #[test]
io_write_read()2510     fn io_write_read() {
2511         with_default_file(1024 * 1024 * 10, |mut qcow_file| {
2512             const BLOCK_SIZE: usize = 0x1_0000;
2513             let data_55 = [0x55u8; BLOCK_SIZE];
2514             let data_aa = [0xaau8; BLOCK_SIZE];
2515             let mut readback = [0u8; BLOCK_SIZE];
2516 
2517             qcow_file.write_all(&data_55).unwrap();
2518             assert_eq!(
2519                 qcow_file.seek(SeekFrom::Current(0)).unwrap(),
2520                 BLOCK_SIZE as u64
2521             );
2522 
2523             qcow_file.write_all(&data_aa).unwrap();
2524             assert_eq!(
2525                 qcow_file.seek(SeekFrom::Current(0)).unwrap(),
2526                 BLOCK_SIZE as u64 * 2
2527             );
2528 
2529             // Read BLOCK_SIZE of just 0xaa.
2530             assert_eq!(
2531                 qcow_file
2532                     .seek(SeekFrom::Current(-(BLOCK_SIZE as i64)))
2533                     .unwrap(),
2534                 BLOCK_SIZE as u64
2535             );
2536             qcow_file.read_exact(&mut readback).unwrap();
2537             assert_eq!(
2538                 qcow_file.seek(SeekFrom::Current(0)).unwrap(),
2539                 BLOCK_SIZE as u64 * 2
2540             );
2541             for (orig, read) in data_aa.iter().zip(readback.iter()) {
2542                 assert_eq!(orig, read);
2543             }
2544 
2545             // Read BLOCK_SIZE of just 0x55.
2546             qcow_file.rewind().unwrap();
2547             qcow_file.read_exact(&mut readback).unwrap();
2548             for (orig, read) in data_55.iter().zip(readback.iter()) {
2549                 assert_eq!(orig, read);
2550             }
2551 
2552             // Read BLOCK_SIZE crossing between the block of 0x55 and 0xaa.
2553             qcow_file
2554                 .seek(SeekFrom::Start(BLOCK_SIZE as u64 / 2))
2555                 .unwrap();
2556             qcow_file.read_exact(&mut readback).unwrap();
2557             for (orig, read) in data_55[BLOCK_SIZE / 2..]
2558                 .iter()
2559                 .chain(data_aa[..BLOCK_SIZE / 2].iter())
2560                 .zip(readback.iter())
2561             {
2562                 assert_eq!(orig, read);
2563             }
2564         });
2565     }
2566 }
2567