disk/qcow/
mod.rs

1// Copyright 2018 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod qcow_raw_file;
6mod refcount;
7mod vec_cache;
8
9use std::cmp::max;
10use std::cmp::min;
11use std::fs::File;
12use std::io;
13use std::io::Read;
14use std::io::Seek;
15use std::io::SeekFrom;
16use std::io::Write;
17use std::mem::size_of;
18use std::path::PathBuf;
19use std::str;
20
21use base::error;
22use base::AsRawDescriptor;
23use base::AsRawDescriptors;
24use base::FileAllocate;
25use base::FileReadWriteAtVolatile;
26use base::FileSetLen;
27use base::FileSync;
28use base::PunchHole;
29use base::RawDescriptor;
30use base::VolatileMemory;
31use base::VolatileSlice;
32use base::WriteZeroesAt;
33use cros_async::Executor;
34use libc::EINVAL;
35use libc::ENOSPC;
36use libc::ENOTSUP;
37use remain::sorted;
38use sync::Mutex;
39use thiserror::Error;
40
41use crate::asynchronous::DiskFlush;
42use crate::open_disk_file;
43use crate::qcow::qcow_raw_file::QcowRawFile;
44use crate::qcow::refcount::RefCount;
45use crate::qcow::vec_cache::CacheMap;
46use crate::qcow::vec_cache::Cacheable;
47use crate::qcow::vec_cache::VecCache;
48use crate::AsyncDisk;
49use crate::AsyncDiskFileWrapper;
50use crate::DiskFile;
51use crate::DiskFileParams;
52use crate::DiskGetLen;
53use crate::ToAsyncDisk;
54
55#[sorted]
56#[derive(Error, Debug)]
57pub enum Error {
58    #[error("backing file io error: {0}")]
59    BackingFileIo(io::Error),
60    #[error("backing file open error: {0}")]
61    BackingFileOpen(Box<crate::Error>),
62    #[error("backing file name is too long: {0} bytes over")]
63    BackingFileTooLong(usize),
64    #[error("compressed blocks not supported")]
65    CompressedBlocksNotSupported,
66    #[error("failed to evict cache: {0}")]
67    EvictingCache(io::Error),
68    #[error("file larger than max of {MAX_QCOW_FILE_SIZE}: {0}")]
69    FileTooBig(u64),
70    #[error("failed to get file size: {0}")]
71    GettingFileSize(io::Error),
72    #[error("failed to get refcount: {0}")]
73    GettingRefcount(refcount::Error),
74    #[error("failed to parse filename: {0}")]
75    InvalidBackingFileName(str::Utf8Error),
76    #[error("invalid cluster index")]
77    InvalidClusterIndex,
78    #[error("invalid cluster size")]
79    InvalidClusterSize,
80    #[error("invalid index")]
81    InvalidIndex,
82    #[error("invalid L1 table offset")]
83    InvalidL1TableOffset,
84    #[error("invalid L1 table size {0}")]
85    InvalidL1TableSize(u32),
86    #[error("invalid magic")]
87    InvalidMagic,
88    #[error("invalid offset")]
89    InvalidOffset(u64),
90    #[error("invalid refcount table offset")]
91    InvalidRefcountTableOffset,
92    #[error("invalid refcount table size: {0}")]
93    InvalidRefcountTableSize(u64),
94    #[error("no free clusters")]
95    NoFreeClusters,
96    #[error("no refcount clusters")]
97    NoRefcountClusters,
98    #[error("not enough space for refcounts")]
99    NotEnoughSpaceForRefcounts,
100    #[error("failed to open file: {0}")]
101    OpeningFile(io::Error),
102    #[error("failed to open file: {0}")]
103    ReadingHeader(io::Error),
104    #[error("failed to read pointers: {0}")]
105    ReadingPointers(io::Error),
106    #[error("failed to read ref count block: {0}")]
107    ReadingRefCountBlock(refcount::Error),
108    #[error("failed to read ref counts: {0}")]
109    ReadingRefCounts(io::Error),
110    #[error("failed to rebuild ref counts: {0}")]
111    RebuildingRefCounts(io::Error),
112    #[error("refcount table offset past file end")]
113    RefcountTableOffEnd,
114    #[error("too many clusters specified for refcount table")]
115    RefcountTableTooLarge,
116    #[error("failed to seek file: {0}")]
117    SeekingFile(io::Error),
118    #[error("failed to set refcount refcount: {0}")]
119    SettingRefcountRefcount(io::Error),
120    #[error("size too small for number of clusters")]
121    SizeTooSmallForNumberOfClusters,
122    #[error("l1 entry table too large: {0}")]
123    TooManyL1Entries(u64),
124    #[error("ref count table too large: {0}")]
125    TooManyRefcounts(u64),
126    #[error("unsupported refcount order")]
127    UnsupportedRefcountOrder,
128    #[error("unsupported version: {0}")]
129    UnsupportedVersion(u32),
130    #[error("failed to write header: {0}")]
131    WritingHeader(io::Error),
132}
133
134pub type Result<T> = std::result::Result<T, Error>;
135
136// Maximum data size supported.
137const MAX_QCOW_FILE_SIZE: u64 = 0x01 << 44; // 16 TB.
138
139// QCOW magic constant that starts the header.
140pub const QCOW_MAGIC: u32 = 0x5146_49fb;
141// Default to a cluster size of 2^DEFAULT_CLUSTER_BITS
142const DEFAULT_CLUSTER_BITS: u32 = 16;
143// Limit clusters to reasonable sizes. Choose the same limits as qemu. Making the clusters smaller
144// increases the amount of overhead for book keeping.
145const MIN_CLUSTER_BITS: u32 = 9;
146const MAX_CLUSTER_BITS: u32 = 21;
147// The L1 and RefCount table are kept in RAM, only handle files that require less than 35M entries.
148// This easily covers 1 TB files. When support for bigger files is needed the assumptions made to
149// keep these tables in RAM needs to be thrown out.
150const MAX_RAM_POINTER_TABLE_SIZE: u64 = 35_000_000;
151// Only support 2 byte refcounts, 2^refcount_order bits.
152const DEFAULT_REFCOUNT_ORDER: u32 = 4;
153
154const V3_BARE_HEADER_SIZE: u32 = 104;
155
156// bits 0-8 and 56-63 are reserved.
157const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
158const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
159// Flags
160const COMPRESSED_FLAG: u64 = 1 << 62;
161const CLUSTER_USED_FLAG: u64 = 1 << 63;
162const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1 << 0;
163
164// The format supports a "header extension area", that crosvm does not use.
165const QCOW_EMPTY_HEADER_EXTENSION_SIZE: u32 = 8;
166
167// Defined by the specification
168const MAX_BACKING_FILE_SIZE: u32 = 1023;
169
170/// Contains the information from the header of a qcow file.
171#[derive(Clone, Debug)]
172pub struct QcowHeader {
173    pub magic: u32,
174    pub version: u32,
175
176    pub backing_file_offset: u64,
177    pub backing_file_size: u32,
178
179    pub cluster_bits: u32,
180    pub size: u64,
181    pub crypt_method: u32,
182
183    pub l1_size: u32,
184    pub l1_table_offset: u64,
185
186    pub refcount_table_offset: u64,
187    pub refcount_table_clusters: u32,
188
189    pub nb_snapshots: u32,
190    pub snapshots_offset: u64,
191
192    // v3 entries
193    pub incompatible_features: u64,
194    pub compatible_features: u64,
195    pub autoclear_features: u64,
196    pub refcount_order: u32,
197    pub header_size: u32,
198
199    // Post-header entries
200    pub backing_file_path: Option<String>,
201}
202
203// Reads the next u16 from the file.
204fn read_u16_from_file(mut f: &File) -> Result<u16> {
205    let mut value = [0u8; 2];
206    (&mut f)
207        .read_exact(&mut value)
208        .map_err(Error::ReadingHeader)?;
209    Ok(u16::from_be_bytes(value))
210}
211
212// Reads the next u32 from the file.
213fn read_u32_from_file(mut f: &File) -> Result<u32> {
214    let mut value = [0u8; 4];
215    (&mut f)
216        .read_exact(&mut value)
217        .map_err(Error::ReadingHeader)?;
218    Ok(u32::from_be_bytes(value))
219}
220
221// Reads the next u64 from the file.
222fn read_u64_from_file(mut f: &File) -> Result<u64> {
223    let mut value = [0u8; 8];
224    (&mut f)
225        .read_exact(&mut value)
226        .map_err(Error::ReadingHeader)?;
227    Ok(u64::from_be_bytes(value))
228}
229
230impl QcowHeader {
231    /// Creates a QcowHeader from a reference to a file.
232    pub fn new(f: &mut File) -> Result<QcowHeader> {
233        f.seek(SeekFrom::Start(0)).map_err(Error::ReadingHeader)?;
234
235        let magic = read_u32_from_file(f)?;
236        if magic != QCOW_MAGIC {
237            return Err(Error::InvalidMagic);
238        }
239
240        let mut header = QcowHeader {
241            magic,
242            version: read_u32_from_file(f)?,
243            backing_file_offset: read_u64_from_file(f)?,
244            backing_file_size: read_u32_from_file(f)?,
245            cluster_bits: read_u32_from_file(f)?,
246            size: read_u64_from_file(f)?,
247            crypt_method: read_u32_from_file(f)?,
248            l1_size: read_u32_from_file(f)?,
249            l1_table_offset: read_u64_from_file(f)?,
250            refcount_table_offset: read_u64_from_file(f)?,
251            refcount_table_clusters: read_u32_from_file(f)?,
252            nb_snapshots: read_u32_from_file(f)?,
253            snapshots_offset: read_u64_from_file(f)?,
254            incompatible_features: read_u64_from_file(f)?,
255            compatible_features: read_u64_from_file(f)?,
256            autoclear_features: read_u64_from_file(f)?,
257            refcount_order: read_u32_from_file(f)?,
258            header_size: read_u32_from_file(f)?,
259            backing_file_path: None,
260        };
261        if header.backing_file_size > MAX_BACKING_FILE_SIZE {
262            return Err(Error::BackingFileTooLong(header.backing_file_size as usize));
263        }
264        if header.backing_file_offset != 0 {
265            f.seek(SeekFrom::Start(header.backing_file_offset))
266                .map_err(Error::ReadingHeader)?;
267            let mut backing_file_name_bytes = vec![0u8; header.backing_file_size as usize];
268            f.read_exact(&mut backing_file_name_bytes)
269                .map_err(Error::ReadingHeader)?;
270            header.backing_file_path = Some(
271                String::from_utf8(backing_file_name_bytes)
272                    .map_err(|err| Error::InvalidBackingFileName(err.utf8_error()))?,
273            );
274        }
275        Ok(header)
276    }
277
278    pub fn create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader> {
279        let cluster_bits: u32 = DEFAULT_CLUSTER_BITS;
280        let cluster_size: u32 = 0x01 << cluster_bits;
281        let max_length: usize =
282            (cluster_size - V3_BARE_HEADER_SIZE - QCOW_EMPTY_HEADER_EXTENSION_SIZE) as usize;
283        if let Some(path) = backing_file {
284            if path.len() > max_length {
285                return Err(Error::BackingFileTooLong(path.len() - max_length));
286            }
287        }
288        // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses.
289        let l2_size: u32 = cluster_size / size_of::<u64>() as u32;
290        let num_clusters: u32 = size.div_ceil(u64::from(cluster_size)) as u32;
291        let num_l2_clusters: u32 = num_clusters.div_ceil(l2_size);
292        let l1_clusters: u32 = num_l2_clusters.div_ceil(cluster_size);
293        let header_clusters = (size_of::<QcowHeader>() as u32).div_ceil(cluster_size);
294        Ok(QcowHeader {
295            magic: QCOW_MAGIC,
296            version: 3,
297            backing_file_offset: (if backing_file.is_none() {
298                0
299            } else {
300                V3_BARE_HEADER_SIZE + QCOW_EMPTY_HEADER_EXTENSION_SIZE
301            }) as u64,
302            backing_file_size: backing_file.map_or(0, |x| x.len()) as u32,
303            cluster_bits: DEFAULT_CLUSTER_BITS,
304            size,
305            crypt_method: 0,
306            l1_size: num_l2_clusters,
307            l1_table_offset: u64::from(cluster_size),
308            // The refcount table is after l1 + header.
309            refcount_table_offset: u64::from(cluster_size * (l1_clusters + 1)),
310            refcount_table_clusters: {
311                // Pre-allocate enough clusters for the entire refcount table as it must be
312                // continuous in the file. Allocate enough space to refcount all clusters, including
313                // the refcount clusters.
314                let max_refcount_clusters = max_refcount_clusters(
315                    DEFAULT_REFCOUNT_ORDER,
316                    cluster_size,
317                    num_clusters + l1_clusters + num_l2_clusters + header_clusters,
318                ) as u32;
319                // The refcount table needs to store the offset of each refcount cluster.
320                (max_refcount_clusters * size_of::<u64>() as u32).div_ceil(cluster_size)
321            },
322            nb_snapshots: 0,
323            snapshots_offset: 0,
324            incompatible_features: 0,
325            compatible_features: 0,
326            autoclear_features: 0,
327            refcount_order: DEFAULT_REFCOUNT_ORDER,
328            header_size: V3_BARE_HEADER_SIZE,
329            backing_file_path: backing_file.map(String::from),
330        })
331    }
332
333    /// Write the header to `file`.
334    pub fn write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()> {
335        // Writes the next u32 to the file.
336        fn write_u32_to_file<F: Write>(f: &mut F, value: u32) -> Result<()> {
337            f.write_all(&value.to_be_bytes())
338                .map_err(Error::WritingHeader)
339        }
340
341        // Writes the next u64 to the file.
342        fn write_u64_to_file<F: Write>(f: &mut F, value: u64) -> Result<()> {
343            f.write_all(&value.to_be_bytes())
344                .map_err(Error::WritingHeader)
345        }
346
347        write_u32_to_file(file, self.magic)?;
348        write_u32_to_file(file, self.version)?;
349        write_u64_to_file(file, self.backing_file_offset)?;
350        write_u32_to_file(file, self.backing_file_size)?;
351        write_u32_to_file(file, self.cluster_bits)?;
352        write_u64_to_file(file, self.size)?;
353        write_u32_to_file(file, self.crypt_method)?;
354        write_u32_to_file(file, self.l1_size)?;
355        write_u64_to_file(file, self.l1_table_offset)?;
356        write_u64_to_file(file, self.refcount_table_offset)?;
357        write_u32_to_file(file, self.refcount_table_clusters)?;
358        write_u32_to_file(file, self.nb_snapshots)?;
359        write_u64_to_file(file, self.snapshots_offset)?;
360        write_u64_to_file(file, self.incompatible_features)?;
361        write_u64_to_file(file, self.compatible_features)?;
362        write_u64_to_file(file, self.autoclear_features)?;
363        write_u32_to_file(file, self.refcount_order)?;
364        write_u32_to_file(file, self.header_size)?;
365        write_u32_to_file(file, 0)?; // header extension type: end of header extension area
366        write_u32_to_file(file, 0)?; // length of header extension data: 0
367        if let Some(backing_file_path) = self.backing_file_path.as_ref() {
368            write!(file, "{backing_file_path}").map_err(Error::WritingHeader)?;
369        }
370
371        // Set the file length by seeking and writing a zero to the last byte. This avoids needing
372        // a `File` instead of anything that implements seek as the `file` argument.
373        // Zeros out the l1 and refcount table clusters.
374        let cluster_size = 0x01u64 << self.cluster_bits;
375        let refcount_blocks_size = u64::from(self.refcount_table_clusters) * cluster_size;
376        file.seek(SeekFrom::Start(
377            self.refcount_table_offset + refcount_blocks_size - 2,
378        ))
379        .map_err(Error::WritingHeader)?;
380        file.write(&[0u8]).map_err(Error::WritingHeader)?;
381
382        Ok(())
383    }
384}
385
386fn max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64 {
387    // Use u64 as the product of the u32 inputs can overflow.
388    let refcount_bytes = (0x01 << refcount_order as u64) / 8;
389    let for_data = (u64::from(num_clusters) * refcount_bytes).div_ceil(u64::from(cluster_size));
390    let for_refcounts = (for_data * refcount_bytes).div_ceil(u64::from(cluster_size));
391    for_data + for_refcounts
392}
393
394/// Represents a qcow2 file. This is a sparse file format maintained by the qemu project.
395/// Full documentation of the format can be found in the qemu repository.
396///
397/// # Example
398///
399/// ```
400/// # use std::path::PathBuf;
401/// # use base::FileReadWriteAtVolatile;
402/// # use disk::QcowFile;
403/// # use disk::DiskFileParams;
404/// # use base::VolatileSlice;
405/// # fn test(file: std::fs::File, path: PathBuf) -> std::io::Result<()> {
406///     let mut q = QcowFile::from(file, DiskFileParams {
407///         path,
408///         is_read_only: false,
409///         is_sparse_file: false,
410///         is_overlapped: false,
411///         is_direct: false,
412///         lock: true,
413///         depth: 0,
414///     }).expect("Can't open qcow file");
415///     let mut buf = [0u8; 12];
416///     let mut vslice = VolatileSlice::new(&mut buf);
417///     q.read_at_volatile(vslice, 10)?;
418/// #   Ok(())
419/// # }
420/// ```
421#[derive(Debug)]
422pub struct QcowFile {
423    inner: Mutex<QcowFileInner>,
424    // Copy of `inner.header.size` outside the mutex.
425    virtual_size: u64,
426}
427
428#[derive(Debug)]
429struct QcowFileInner {
430    raw_file: QcowRawFile,
431    header: QcowHeader,
432    l1_table: VecCache<u64>,
433    l2_entries: u64,
434    l2_cache: CacheMap<VecCache<u64>>,
435    refcounts: RefCount,
436    current_offset: u64,
437    unref_clusters: Vec<u64>, // List of freshly unreferenced clusters.
438    // List of unreferenced clusters available to be used. unref clusters become available once the
439    // removal of references to them have been synced to disk.
440    avail_clusters: Vec<u64>,
441    backing_file: Option<Box<dyn DiskFile>>,
442}
443
444impl DiskFile for QcowFile {}
445
446impl DiskFlush for QcowFile {
447    fn flush(&self) -> io::Result<()> {
448        // Using fsync is overkill here, but, the code for flushing state to file tangled up with
449        // the fsync, so it is best we can do for now.
450        self.fsync()
451    }
452}
453
454impl QcowFile {
455    /// Creates a QcowFile from `file`. File must be a valid qcow2 image.
456    pub fn from(mut file: File, params: DiskFileParams) -> Result<QcowFile> {
457        let header = QcowHeader::new(&mut file)?;
458
459        // Only v3 files are supported.
460        if header.version != 3 {
461            return Err(Error::UnsupportedVersion(header.version));
462        }
463
464        // Make sure that the L1 table fits in RAM.
465        if u64::from(header.l1_size) > MAX_RAM_POINTER_TABLE_SIZE {
466            return Err(Error::InvalidL1TableSize(header.l1_size));
467        }
468
469        let cluster_bits: u32 = header.cluster_bits;
470        if !(MIN_CLUSTER_BITS..=MAX_CLUSTER_BITS).contains(&cluster_bits) {
471            return Err(Error::InvalidClusterSize);
472        }
473        let cluster_size = 0x01u64 << cluster_bits;
474
475        // Limit the total size of the disk.
476        if header.size > MAX_QCOW_FILE_SIZE {
477            return Err(Error::FileTooBig(header.size));
478        }
479
480        let backing_file = if let Some(backing_file_path) = header.backing_file_path.as_ref() {
481            let backing_file = open_disk_file(DiskFileParams {
482                path: PathBuf::from(backing_file_path),
483                // The backing file is only read from.
484                is_read_only: true,
485                // Sparse isn't meaningful for read only files.
486                is_sparse_file: false,
487                // TODO: Should pass `params.is_overlapped` through here. Needs testing.
488                is_overlapped: false,
489                is_direct: params.is_direct,
490                lock: params.lock,
491                depth: params.depth + 1,
492            })
493            .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
494            Some(backing_file)
495        } else {
496            None
497        };
498
499        // Only support two byte refcounts.
500        let refcount_bits: u64 = 0x01u64
501            .checked_shl(header.refcount_order)
502            .ok_or(Error::UnsupportedRefcountOrder)?;
503        if refcount_bits != 16 {
504            return Err(Error::UnsupportedRefcountOrder);
505        }
506        let refcount_bytes = refcount_bits.div_ceil(8);
507
508        // Need at least one refcount cluster
509        if header.refcount_table_clusters == 0 {
510            return Err(Error::NoRefcountClusters);
511        }
512        offset_is_cluster_boundary(header.l1_table_offset, header.cluster_bits)?;
513        offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?;
514        // refcount table must be a cluster boundary, and within the file's virtual or actual size.
515        offset_is_cluster_boundary(header.refcount_table_offset, header.cluster_bits)?;
516        let file_size = file.metadata().map_err(Error::GettingFileSize)?.len();
517        if header.refcount_table_offset > max(file_size, header.size) {
518            return Err(Error::RefcountTableOffEnd);
519        }
520
521        // The first cluster should always have a non-zero refcount, so if it is 0,
522        // this is an old file with broken refcounts, which requires a rebuild.
523        let mut refcount_rebuild_required = true;
524        file.seek(SeekFrom::Start(header.refcount_table_offset))
525            .map_err(Error::SeekingFile)?;
526        let first_refblock_addr = read_u64_from_file(&file)?;
527        if first_refblock_addr != 0 {
528            file.seek(SeekFrom::Start(first_refblock_addr))
529                .map_err(Error::SeekingFile)?;
530            let first_cluster_refcount = read_u16_from_file(&file)?;
531            if first_cluster_refcount != 0 {
532                refcount_rebuild_required = false;
533            }
534        }
535
536        if (header.compatible_features & COMPATIBLE_FEATURES_LAZY_REFCOUNTS) != 0 {
537            refcount_rebuild_required = true;
538        }
539
540        let mut raw_file =
541            QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?;
542        if refcount_rebuild_required {
543            QcowFileInner::rebuild_refcounts(&mut raw_file, header.clone())?;
544        }
545
546        let l2_size = cluster_size / size_of::<u64>() as u64;
547        let num_clusters = header.size.div_ceil(cluster_size);
548        let num_l2_clusters = num_clusters.div_ceil(l2_size);
549        let l1_clusters = num_l2_clusters.div_ceil(cluster_size);
550        let header_clusters = (size_of::<QcowHeader>() as u64).div_ceil(cluster_size);
551        if num_l2_clusters > MAX_RAM_POINTER_TABLE_SIZE {
552            return Err(Error::TooManyL1Entries(num_l2_clusters));
553        }
554        let l1_table = VecCache::from_vec(
555            raw_file
556                .read_pointer_table(
557                    header.l1_table_offset,
558                    num_l2_clusters,
559                    Some(L1_TABLE_OFFSET_MASK),
560                )
561                .map_err(Error::ReadingHeader)?,
562        );
563
564        let num_clusters = header.size.div_ceil(cluster_size);
565        let refcount_clusters = max_refcount_clusters(
566            header.refcount_order,
567            cluster_size as u32,
568            (num_clusters + l1_clusters + num_l2_clusters + header_clusters) as u32,
569        );
570        // Check that the given header doesn't have a suspiciously sized refcount table.
571        if u64::from(header.refcount_table_clusters) > 2 * refcount_clusters {
572            return Err(Error::RefcountTableTooLarge);
573        }
574        if l1_clusters + refcount_clusters > MAX_RAM_POINTER_TABLE_SIZE {
575            return Err(Error::TooManyRefcounts(refcount_clusters));
576        }
577        let refcount_block_entries = cluster_size / refcount_bytes;
578        let refcounts = RefCount::new(
579            &mut raw_file,
580            header.refcount_table_offset,
581            refcount_clusters,
582            refcount_block_entries,
583            cluster_size,
584        )
585        .map_err(Error::ReadingRefCounts)?;
586
587        let l2_entries = cluster_size / size_of::<u64>() as u64;
588
589        let mut inner = QcowFileInner {
590            raw_file,
591            header,
592            l1_table,
593            l2_entries,
594            l2_cache: CacheMap::new(100),
595            refcounts,
596            current_offset: 0,
597            unref_clusters: Vec::new(),
598            avail_clusters: Vec::new(),
599            backing_file,
600        };
601
602        // Check that the L1 and refcount tables fit in a 64bit address space.
603        inner
604            .header
605            .l1_table_offset
606            .checked_add(inner.l1_address_offset(inner.virtual_size()))
607            .ok_or(Error::InvalidL1TableOffset)?;
608        inner
609            .header
610            .refcount_table_offset
611            .checked_add(u64::from(inner.header.refcount_table_clusters) * cluster_size)
612            .ok_or(Error::InvalidRefcountTableOffset)?;
613
614        inner.find_avail_clusters()?;
615
616        let virtual_size = inner.virtual_size();
617        Ok(QcowFile {
618            inner: Mutex::new(inner),
619            virtual_size,
620        })
621    }
622
623    /// Creates a new QcowFile at the given path.
624    pub fn new(file: File, params: DiskFileParams, virtual_size: u64) -> Result<QcowFile> {
625        let header = QcowHeader::create_for_size_and_path(virtual_size, None)?;
626        QcowFile::new_from_header(file, params, header)
627    }
628
629    /// Creates a new QcowFile at the given path.
630    pub fn new_from_backing(
631        file: File,
632        params: DiskFileParams,
633        backing_file_name: &str,
634    ) -> Result<QcowFile> {
635        // Open the backing file as a `DiskFile` to determine its size (which may not match the
636        // filesystem size).
637        let size = {
638            let backing_file = open_disk_file(DiskFileParams {
639                path: PathBuf::from(backing_file_name),
640                // The backing file is only read from.
641                is_read_only: true,
642                // Sparse isn't meaningful for read only files.
643                is_sparse_file: false,
644                // TODO: Should pass `params.is_overlapped` through here. Needs testing.
645                is_overlapped: false,
646                is_direct: params.is_direct,
647                lock: params.lock,
648                depth: params.depth + 1,
649            })
650            .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
651            backing_file.get_len().map_err(Error::BackingFileIo)?
652        };
653        let header = QcowHeader::create_for_size_and_path(size, Some(backing_file_name))?;
654        QcowFile::new_from_header(file, params, header)
655    }
656
657    fn new_from_header(
658        mut file: File,
659        params: DiskFileParams,
660        header: QcowHeader,
661    ) -> Result<QcowFile> {
662        file.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?;
663        header.write_to(&mut file)?;
664
665        let mut qcow = Self::from(file, params)?;
666        let inner = qcow.inner.get_mut();
667
668        // Set the refcount for each refcount table cluster.
669        let cluster_size = 0x01u64 << inner.header.cluster_bits;
670        let refcount_table_base = inner.header.refcount_table_offset;
671        let end_cluster_addr =
672            refcount_table_base + u64::from(inner.header.refcount_table_clusters) * cluster_size;
673
674        let mut cluster_addr = 0;
675        while cluster_addr < end_cluster_addr {
676            let mut unref_clusters = inner
677                .set_cluster_refcount(cluster_addr, 1)
678                .map_err(Error::SettingRefcountRefcount)?;
679            inner.unref_clusters.append(&mut unref_clusters);
680            cluster_addr += cluster_size;
681        }
682
683        Ok(qcow)
684    }
685
686    pub fn set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>) {
687        self.inner.get_mut().backing_file = backing;
688    }
689}
690
691impl QcowFileInner {
692    /// Returns the first cluster in the file with a 0 refcount. Used for testing.
693    #[cfg(test)]
694    fn first_zero_refcount(&mut self) -> Result<Option<u64>> {
695        let file_size = self
696            .raw_file
697            .file_mut()
698            .metadata()
699            .map_err(Error::GettingFileSize)?
700            .len();
701        let cluster_size = 0x01u64 << self.header.cluster_bits;
702
703        let mut cluster_addr = 0;
704        while cluster_addr < file_size {
705            let cluster_refcount = self
706                .refcounts
707                .get_cluster_refcount(&mut self.raw_file, cluster_addr)
708                .map_err(Error::GettingRefcount)?;
709            if cluster_refcount == 0 {
710                return Ok(Some(cluster_addr));
711            }
712            cluster_addr += cluster_size;
713        }
714        Ok(None)
715    }
716
717    fn find_avail_clusters(&mut self) -> Result<()> {
718        let cluster_size = self.raw_file.cluster_size();
719
720        let file_size = self
721            .raw_file
722            .file_mut()
723            .metadata()
724            .map_err(Error::GettingFileSize)?
725            .len();
726
727        for i in (0..file_size).step_by(cluster_size as usize) {
728            let refcount = self
729                .refcounts
730                .get_cluster_refcount(&mut self.raw_file, i)
731                .map_err(Error::GettingRefcount)?;
732            if refcount == 0 {
733                self.avail_clusters.push(i);
734            }
735        }
736
737        Ok(())
738    }
739
740    /// Rebuild the reference count tables.
741    fn rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()> {
742        fn add_ref(refcounts: &mut [u16], cluster_size: u64, cluster_address: u64) -> Result<()> {
743            let idx = (cluster_address / cluster_size) as usize;
744            if idx >= refcounts.len() {
745                return Err(Error::InvalidClusterIndex);
746            }
747            refcounts[idx] += 1;
748            Ok(())
749        }
750
751        // Add a reference to the first cluster (header plus extensions).
752        fn set_header_refcount(refcounts: &mut [u16], cluster_size: u64) -> Result<()> {
753            add_ref(refcounts, cluster_size, 0)
754        }
755
756        // Add references to the L1 table clusters.
757        fn set_l1_refcounts(
758            refcounts: &mut [u16],
759            header: QcowHeader,
760            cluster_size: u64,
761        ) -> Result<()> {
762            let l1_clusters = u64::from(header.l1_size).div_ceil(cluster_size);
763            let l1_table_offset = header.l1_table_offset;
764            for i in 0..l1_clusters {
765                add_ref(refcounts, cluster_size, l1_table_offset + i * cluster_size)?;
766            }
767            Ok(())
768        }
769
770        // Traverse the L1 and L2 tables to find all reachable data clusters.
771        fn set_data_refcounts(
772            refcounts: &mut [u16],
773            header: QcowHeader,
774            cluster_size: u64,
775            raw_file: &mut QcowRawFile,
776        ) -> Result<()> {
777            let l1_table = raw_file
778                .read_pointer_table(
779                    header.l1_table_offset,
780                    header.l1_size as u64,
781                    Some(L1_TABLE_OFFSET_MASK),
782                )
783                .map_err(Error::ReadingPointers)?;
784            for l1_index in 0..header.l1_size as usize {
785                let l2_addr_disk = *l1_table.get(l1_index).ok_or(Error::InvalidIndex)?;
786                if l2_addr_disk != 0 {
787                    // Add a reference to the L2 table cluster itself.
788                    add_ref(refcounts, cluster_size, l2_addr_disk)?;
789
790                    // Read the L2 table and find all referenced data clusters.
791                    let l2_table = raw_file
792                        .read_pointer_table(
793                            l2_addr_disk,
794                            cluster_size / size_of::<u64>() as u64,
795                            Some(L2_TABLE_OFFSET_MASK),
796                        )
797                        .map_err(Error::ReadingPointers)?;
798                    for data_cluster_addr in l2_table {
799                        if data_cluster_addr != 0 {
800                            add_ref(refcounts, cluster_size, data_cluster_addr)?;
801                        }
802                    }
803                }
804            }
805
806            Ok(())
807        }
808
809        // Add references to the top-level refcount table clusters.
810        fn set_refcount_table_refcounts(
811            refcounts: &mut [u16],
812            header: QcowHeader,
813            cluster_size: u64,
814        ) -> Result<()> {
815            let refcount_table_offset = header.refcount_table_offset;
816            for i in 0..header.refcount_table_clusters as u64 {
817                add_ref(
818                    refcounts,
819                    cluster_size,
820                    refcount_table_offset + i * cluster_size,
821                )?;
822            }
823            Ok(())
824        }
825
826        // Allocate clusters for refblocks.
827        // This needs to be done last so that we have the correct refcounts for all other
828        // clusters.
829        fn alloc_refblocks(
830            refcounts: &mut [u16],
831            cluster_size: u64,
832            refblock_clusters: u64,
833            pointers_per_cluster: u64,
834        ) -> Result<Vec<u64>> {
835            let refcount_table_entries = refblock_clusters.div_ceil(pointers_per_cluster);
836            let mut ref_table = vec![0; refcount_table_entries as usize];
837            let mut first_free_cluster: u64 = 0;
838            for refblock_addr in &mut ref_table {
839                loop {
840                    if first_free_cluster >= refcounts.len() as u64 {
841                        return Err(Error::NotEnoughSpaceForRefcounts);
842                    }
843                    if refcounts[first_free_cluster as usize] == 0 {
844                        break;
845                    }
846                    first_free_cluster += 1;
847                }
848
849                *refblock_addr = first_free_cluster * cluster_size;
850                add_ref(refcounts, cluster_size, *refblock_addr)?;
851
852                first_free_cluster += 1;
853            }
854
855            Ok(ref_table)
856        }
857
858        // Write the updated reference count blocks and reftable.
859        fn write_refblocks(
860            refcounts: &[u16],
861            mut header: QcowHeader,
862            ref_table: &[u64],
863            raw_file: &mut QcowRawFile,
864            refcount_block_entries: u64,
865        ) -> Result<()> {
866            // Rewrite the header with lazy refcounts enabled while we are rebuilding the tables.
867            header.compatible_features |= COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
868            raw_file
869                .file_mut()
870                .seek(SeekFrom::Start(0))
871                .map_err(Error::SeekingFile)?;
872            header.write_to(raw_file.file_mut())?;
873
874            for (i, refblock_addr) in ref_table.iter().enumerate() {
875                // Write a block of refcounts to the location indicated by refblock_addr.
876                let refblock_start = i * (refcount_block_entries as usize);
877                let refblock_end = min(
878                    refcounts.len(),
879                    refblock_start + refcount_block_entries as usize,
880                );
881                let refblock = &refcounts[refblock_start..refblock_end];
882                raw_file
883                    .write_refcount_block(*refblock_addr, refblock)
884                    .map_err(Error::WritingHeader)?;
885
886                // If this is the last (partial) cluster, pad it out to a full refblock cluster.
887                if refblock.len() < refcount_block_entries as usize {
888                    let refblock_padding =
889                        vec![0u16; refcount_block_entries as usize - refblock.len()];
890                    raw_file
891                        .write_refcount_block(
892                            *refblock_addr + refblock.len() as u64 * 2,
893                            &refblock_padding,
894                        )
895                        .map_err(Error::WritingHeader)?;
896                }
897            }
898
899            // Rewrite the top-level refcount table.
900            raw_file
901                .write_pointer_table(header.refcount_table_offset, ref_table, 0)
902                .map_err(Error::WritingHeader)?;
903
904            // Rewrite the header again, now with lazy refcounts disabled.
905            header.compatible_features &= !COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
906            raw_file
907                .file_mut()
908                .seek(SeekFrom::Start(0))
909                .map_err(Error::SeekingFile)?;
910            header.write_to(raw_file.file_mut())?;
911
912            Ok(())
913        }
914
915        let cluster_size = raw_file.cluster_size();
916
917        let file_size = raw_file
918            .file_mut()
919            .metadata()
920            .map_err(Error::GettingFileSize)?
921            .len();
922
923        let refcount_bits = 1u64 << header.refcount_order;
924        let refcount_bytes = refcount_bits.div_ceil(8);
925        let refcount_block_entries = cluster_size / refcount_bytes;
926        let pointers_per_cluster = cluster_size / size_of::<u64>() as u64;
927        let data_clusters = header.size.div_ceil(cluster_size);
928        let l2_clusters = data_clusters.div_ceil(pointers_per_cluster);
929        let l1_clusters = l2_clusters.div_ceil(cluster_size);
930        let header_clusters = (size_of::<QcowHeader>() as u64).div_ceil(cluster_size);
931        let max_clusters = data_clusters + l2_clusters + l1_clusters + header_clusters;
932        let mut max_valid_cluster_index = max_clusters;
933        let refblock_clusters = max_valid_cluster_index.div_ceil(refcount_block_entries);
934        let reftable_clusters = refblock_clusters.div_ceil(pointers_per_cluster);
935        // Account for refblocks and the ref table size needed to address them.
936        let refblocks_for_refs =
937            (refblock_clusters + reftable_clusters).div_ceil(refcount_block_entries);
938        let reftable_clusters_for_refs = refblocks_for_refs.div_ceil(refcount_block_entries);
939        max_valid_cluster_index += refblock_clusters + reftable_clusters;
940        max_valid_cluster_index += refblocks_for_refs + reftable_clusters_for_refs;
941
942        if max_valid_cluster_index > MAX_RAM_POINTER_TABLE_SIZE {
943            return Err(Error::InvalidRefcountTableSize(max_valid_cluster_index));
944        }
945
946        let max_valid_cluster_offset = max_valid_cluster_index * cluster_size;
947        if max_valid_cluster_offset < file_size - cluster_size {
948            return Err(Error::InvalidRefcountTableSize(max_valid_cluster_offset));
949        }
950
951        let mut refcounts = vec![0; max_valid_cluster_index as usize];
952
953        // Find all references clusters and rebuild refcounts.
954        set_header_refcount(&mut refcounts, cluster_size)?;
955        set_l1_refcounts(&mut refcounts, header.clone(), cluster_size)?;
956        set_data_refcounts(&mut refcounts, header.clone(), cluster_size, raw_file)?;
957        set_refcount_table_refcounts(&mut refcounts, header.clone(), cluster_size)?;
958
959        // Allocate clusters to store the new reference count blocks.
960        let ref_table = alloc_refblocks(
961            &mut refcounts,
962            cluster_size,
963            refblock_clusters,
964            pointers_per_cluster,
965        )?;
966
967        // Write updated reference counts and point the reftable at them.
968        write_refblocks(
969            &refcounts,
970            header,
971            &ref_table,
972            raw_file,
973            refcount_block_entries,
974        )
975    }
976
977    // Limits the range so that it doesn't exceed the virtual size of the file.
978    fn limit_range_file(&self, address: u64, count: usize) -> usize {
979        if address.checked_add(count as u64).is_none() || address > self.virtual_size() {
980            return 0;
981        }
982        min(count as u64, self.virtual_size() - address) as usize
983    }
984
985    // Limits the range so that it doesn't overflow the end of a cluster.
986    fn limit_range_cluster(&self, address: u64, count: usize) -> usize {
987        let offset: u64 = self.raw_file.cluster_offset(address);
988        let limit = self.raw_file.cluster_size() - offset;
989        min(count as u64, limit) as usize
990    }
991
992    // Gets the maximum virtual size of this image.
993    fn virtual_size(&self) -> u64 {
994        self.header.size
995    }
996
997    // Gets the offset of `address` in the L1 table.
998    fn l1_address_offset(&self, address: u64) -> u64 {
999        let l1_index = self.l1_table_index(address);
1000        l1_index * size_of::<u64>() as u64
1001    }
1002
1003    // Gets the offset of `address` in the L1 table.
1004    fn l1_table_index(&self, address: u64) -> u64 {
1005        (address / self.raw_file.cluster_size()) / self.l2_entries
1006    }
1007
1008    // Gets the offset of `address` in the L2 table.
1009    fn l2_table_index(&self, address: u64) -> u64 {
1010        (address / self.raw_file.cluster_size()) % self.l2_entries
1011    }
1012
1013    // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters have
1014    // yet to be allocated, return None.
1015    fn file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>> {
1016        if address >= self.virtual_size() {
1017            return Err(std::io::Error::from_raw_os_error(EINVAL));
1018        }
1019
1020        let l1_index = self.l1_table_index(address) as usize;
1021        let l2_addr_disk = *self
1022            .l1_table
1023            .get(l1_index)
1024            .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1025
1026        if l2_addr_disk == 0 {
1027            // Reading from an unallocated cluster will return zeros.
1028            return Ok(None);
1029        }
1030
1031        let l2_index = self.l2_table_index(address) as usize;
1032
1033        if !self.l2_cache.contains_key(&l1_index) {
1034            // Not in the cache.
1035            let table =
1036                VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1037
1038            let l1_table = &self.l1_table;
1039            let raw_file = &mut self.raw_file;
1040            self.l2_cache.insert(l1_index, table, |index, evicted| {
1041                raw_file.write_pointer_table(
1042                    l1_table[index],
1043                    evicted.get_values(),
1044                    CLUSTER_USED_FLAG,
1045                )
1046            })?;
1047        };
1048
1049        let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1050        if cluster_addr == 0 {
1051            return Ok(None);
1052        }
1053        Ok(Some(cluster_addr + self.raw_file.cluster_offset(address)))
1054    }
1055
1056    // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters need
1057    // to be allocated, they will be.
1058    fn file_offset_write(&mut self, address: u64) -> std::io::Result<u64> {
1059        if address >= self.virtual_size() {
1060            return Err(std::io::Error::from_raw_os_error(EINVAL));
1061        }
1062
1063        let l1_index = self.l1_table_index(address) as usize;
1064        let l2_addr_disk = *self
1065            .l1_table
1066            .get(l1_index)
1067            .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1068        let l2_index = self.l2_table_index(address) as usize;
1069
1070        let mut set_refcounts = Vec::new();
1071
1072        if !self.l2_cache.contains_key(&l1_index) {
1073            // Not in the cache.
1074            let l2_table = if l2_addr_disk == 0 {
1075                // Allocate a new cluster to store the L2 table and update the L1 table to point
1076                // to the new table.
1077                let new_addr: u64 = self.get_new_cluster(None)?;
1078                // The cluster refcount starts at one meaning it is used but doesn't need COW.
1079                set_refcounts.push((new_addr, 1));
1080                self.l1_table[l1_index] = new_addr;
1081                VecCache::new(self.l2_entries as usize)
1082            } else {
1083                VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?)
1084            };
1085            let l1_table = &self.l1_table;
1086            let raw_file = &mut self.raw_file;
1087            self.l2_cache.insert(l1_index, l2_table, |index, evicted| {
1088                raw_file.write_pointer_table(
1089                    l1_table[index],
1090                    evicted.get_values(),
1091                    CLUSTER_USED_FLAG,
1092                )
1093            })?;
1094        }
1095
1096        let cluster_addr = match self.l2_cache.get(&l1_index).unwrap()[l2_index] {
1097            0 => {
1098                let initial_data = if let Some(backing) = self.backing_file.as_mut() {
1099                    let cluster_size = self.raw_file.cluster_size();
1100                    let cluster_begin = address - (address % cluster_size);
1101                    let mut cluster_data = vec![0u8; cluster_size as usize];
1102                    let volatile_slice = VolatileSlice::new(&mut cluster_data);
1103                    backing.read_exact_at_volatile(volatile_slice, cluster_begin)?;
1104                    Some(cluster_data)
1105                } else {
1106                    None
1107                };
1108                // Need to allocate a data cluster
1109                let cluster_addr = self.append_data_cluster(initial_data)?;
1110                self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?;
1111                cluster_addr
1112            }
1113            a => a,
1114        };
1115
1116        for (addr, count) in set_refcounts {
1117            let mut newly_unref = self.set_cluster_refcount(addr, count)?;
1118            self.unref_clusters.append(&mut newly_unref);
1119        }
1120
1121        Ok(cluster_addr + self.raw_file.cluster_offset(address))
1122    }
1123
1124    // Updates the l1 and l2 tables to point to the new `cluster_addr`.
1125    fn update_cluster_addr(
1126        &mut self,
1127        l1_index: usize,
1128        l2_index: usize,
1129        cluster_addr: u64,
1130        set_refcounts: &mut Vec<(u64, u16)>,
1131    ) -> io::Result<()> {
1132        if !self.l2_cache.get(&l1_index).unwrap().dirty() {
1133            // Free the previously used cluster if one exists. Modified tables are always
1134            // witten to new clusters so the L1 table can be committed to disk after they
1135            // are and L1 never points at an invalid table.
1136            // The index must be valid from when it was insterted.
1137            let addr = self.l1_table[l1_index];
1138            if addr != 0 {
1139                self.unref_clusters.push(addr);
1140                set_refcounts.push((addr, 0));
1141            }
1142
1143            // Allocate a new cluster to store the L2 table and update the L1 table to point
1144            // to the new table. The cluster will be written when the cache is flushed, no
1145            // need to copy the data now.
1146            let new_addr: u64 = self.get_new_cluster(None)?;
1147            // The cluster refcount starts at one indicating it is used but doesn't need
1148            // COW.
1149            set_refcounts.push((new_addr, 1));
1150            self.l1_table[l1_index] = new_addr;
1151        }
1152        // 'unwrap' is OK because it was just added.
1153        self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = cluster_addr;
1154        Ok(())
1155    }
1156
1157    // Allocate a new cluster and return its offset within the raw file.
1158    fn get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1159        // First use a pre allocated cluster if one is available.
1160        if let Some(free_cluster) = self.avail_clusters.pop() {
1161            if let Some(initial_data) = initial_data {
1162                self.raw_file.write_cluster(free_cluster, initial_data)?;
1163            } else {
1164                self.raw_file.zero_cluster(free_cluster)?;
1165            }
1166            return Ok(free_cluster);
1167        }
1168
1169        let max_valid_cluster_offset = self.refcounts.max_valid_cluster_offset();
1170        if let Some(new_cluster) = self.raw_file.add_cluster_end(max_valid_cluster_offset)? {
1171            if let Some(initial_data) = initial_data {
1172                self.raw_file.write_cluster(new_cluster, initial_data)?;
1173            }
1174            Ok(new_cluster)
1175        } else {
1176            error!("No free clusters in get_new_cluster()");
1177            Err(std::io::Error::from_raw_os_error(ENOSPC))
1178        }
1179    }
1180
1181    // Allocate and initialize a new data cluster. Returns the offset of the
1182    // cluster in to the file on success.
1183    fn append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1184        let new_addr: u64 = self.get_new_cluster(initial_data)?;
1185        // The cluster refcount starts at one indicating it is used but doesn't need COW.
1186        let mut newly_unref = self.set_cluster_refcount(new_addr, 1)?;
1187        self.unref_clusters.append(&mut newly_unref);
1188        Ok(new_addr)
1189    }
1190
1191    // Deallocate the storage for the cluster starting at `address`.
1192    // Any future reads of this cluster will return all zeroes (or the backing file, if in use).
1193    fn deallocate_cluster(&mut self, address: u64) -> std::io::Result<()> {
1194        if address >= self.virtual_size() {
1195            return Err(std::io::Error::from_raw_os_error(EINVAL));
1196        }
1197
1198        let l1_index = self.l1_table_index(address) as usize;
1199        let l2_addr_disk = *self
1200            .l1_table
1201            .get(l1_index)
1202            .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1203        let l2_index = self.l2_table_index(address) as usize;
1204
1205        if l2_addr_disk == 0 {
1206            // The whole L2 table for this address is not allocated yet,
1207            // so the cluster must also be unallocated.
1208            return Ok(());
1209        }
1210
1211        if !self.l2_cache.contains_key(&l1_index) {
1212            // Not in the cache.
1213            let table =
1214                VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1215            let l1_table = &self.l1_table;
1216            let raw_file = &mut self.raw_file;
1217            self.l2_cache.insert(l1_index, table, |index, evicted| {
1218                raw_file.write_pointer_table(
1219                    l1_table[index],
1220                    evicted.get_values(),
1221                    CLUSTER_USED_FLAG,
1222                )
1223            })?;
1224        }
1225
1226        let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1227        if cluster_addr == 0 {
1228            // This cluster is already unallocated; nothing to do.
1229            return Ok(());
1230        }
1231
1232        // Decrement the refcount.
1233        let refcount = self
1234            .refcounts
1235            .get_cluster_refcount(&mut self.raw_file, cluster_addr)
1236            .map_err(|_| std::io::Error::from_raw_os_error(EINVAL))?;
1237        if refcount == 0 {
1238            return Err(std::io::Error::from_raw_os_error(EINVAL));
1239        }
1240
1241        let new_refcount = refcount - 1;
1242        let mut newly_unref = self.set_cluster_refcount(cluster_addr, new_refcount)?;
1243        self.unref_clusters.append(&mut newly_unref);
1244
1245        // Rewrite the L2 entry to remove the cluster mapping.
1246        // unwrap is safe as we just checked/inserted this entry.
1247        self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = 0;
1248
1249        if new_refcount == 0 {
1250            let cluster_size = self.raw_file.cluster_size();
1251            // This cluster is no longer in use; deallocate the storage.
1252            // The underlying FS may not support FALLOC_FL_PUNCH_HOLE,
1253            // so don't treat an error as fatal.  Future reads will return zeros anyways.
1254            let _ = self.raw_file.file().punch_hole(cluster_addr, cluster_size);
1255            self.unref_clusters.push(cluster_addr);
1256        }
1257        Ok(())
1258    }
1259
1260    // Fill a range of `length` bytes starting at `address` with zeroes.
1261    // Any future reads of this range will return all zeroes.
1262    // If there is no backing file, this will deallocate cluster storage when possible.
1263    fn zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()> {
1264        let write_count: usize = self.limit_range_file(address, length);
1265
1266        let mut nwritten: usize = 0;
1267        while nwritten < write_count {
1268            let curr_addr = address + nwritten as u64;
1269            let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1270
1271            if self.backing_file.is_none() && count == self.raw_file.cluster_size() as usize {
1272                // Full cluster and no backing file in use - deallocate the storage.
1273                self.deallocate_cluster(curr_addr)?;
1274            } else {
1275                // Partial cluster - zero out the relevant bytes.
1276                let offset = if self.backing_file.is_some() {
1277                    // There is a backing file, so we need to allocate a cluster in order to
1278                    // zero out the hole-punched bytes such that the backing file contents do not
1279                    // show through.
1280                    Some(self.file_offset_write(curr_addr)?)
1281                } else {
1282                    // Any space in unallocated clusters can be left alone, since
1283                    // unallocated clusters already read back as zeroes.
1284                    self.file_offset_read(curr_addr)?
1285                };
1286                if let Some(offset) = offset {
1287                    // Partial cluster - zero it out.
1288                    self.raw_file.file().write_zeroes_all_at(offset, count)?;
1289                }
1290            }
1291
1292            nwritten += count;
1293        }
1294        Ok(())
1295    }
1296
1297    // Reads an L2 cluster from the disk, returning an error if the file can't be read or if any
1298    // cluster is compressed.
1299    fn read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>> {
1300        let file_values = raw_file.read_pointer_cluster(cluster_addr, None)?;
1301        if file_values.iter().any(|entry| entry & COMPRESSED_FLAG != 0) {
1302            return Err(std::io::Error::from_raw_os_error(ENOTSUP));
1303        }
1304        Ok(file_values
1305            .iter()
1306            .map(|entry| *entry & L2_TABLE_OFFSET_MASK)
1307            .collect())
1308    }
1309
1310    // Set the refcount for a cluster with the given address.
1311    // Returns a list of any refblocks that can be reused, this happens when a refblock is moved,
1312    // the old location can be reused.
1313    fn set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>> {
1314        let mut added_clusters = Vec::new();
1315        let mut unref_clusters = Vec::new();
1316        let mut refcount_set = false;
1317        let mut new_cluster = None;
1318
1319        while !refcount_set {
1320            match self.refcounts.set_cluster_refcount(
1321                &mut self.raw_file,
1322                address,
1323                refcount,
1324                new_cluster.take(),
1325            ) {
1326                Ok(None) => {
1327                    refcount_set = true;
1328                }
1329                Ok(Some(freed_cluster)) => {
1330                    unref_clusters.push(freed_cluster);
1331                    refcount_set = true;
1332                }
1333                Err(refcount::Error::EvictingRefCounts(e)) => {
1334                    return Err(e);
1335                }
1336                Err(refcount::Error::InvalidIndex) => {
1337                    return Err(std::io::Error::from_raw_os_error(EINVAL));
1338                }
1339                Err(refcount::Error::NeedCluster(addr)) => {
1340                    // Read the address and call set_cluster_refcount again.
1341                    new_cluster = Some((
1342                        addr,
1343                        VecCache::from_vec(self.raw_file.read_refcount_block(addr)?),
1344                    ));
1345                }
1346                Err(refcount::Error::NeedNewCluster) => {
1347                    // Allocate the cluster and call set_cluster_refcount again.
1348                    let addr = self.get_new_cluster(None)?;
1349                    added_clusters.push(addr);
1350                    new_cluster = Some((
1351                        addr,
1352                        VecCache::new(self.refcounts.refcounts_per_block() as usize),
1353                    ));
1354                }
1355                Err(refcount::Error::ReadingRefCounts(e)) => {
1356                    return Err(e);
1357                }
1358            }
1359        }
1360
1361        for addr in added_clusters {
1362            self.set_cluster_refcount(addr, 1)?;
1363        }
1364        Ok(unref_clusters)
1365    }
1366
1367    fn sync_caches(&mut self) -> std::io::Result<()> {
1368        // Write out all dirty L2 tables.
1369        for (l1_index, l2_table) in self.l2_cache.iter_mut().filter(|(_k, v)| v.dirty()) {
1370            // The index must be valid from when we insterted it.
1371            let addr = self.l1_table[*l1_index];
1372            if addr != 0 {
1373                self.raw_file.write_pointer_table(
1374                    addr,
1375                    l2_table.get_values(),
1376                    CLUSTER_USED_FLAG,
1377                )?;
1378            } else {
1379                return Err(std::io::Error::from_raw_os_error(EINVAL));
1380            }
1381            l2_table.mark_clean();
1382        }
1383        // Write the modified refcount blocks.
1384        self.refcounts.flush_blocks(&mut self.raw_file)?;
1385        // Make sure metadata(file len) and all data clusters are written.
1386        self.raw_file.file_mut().sync_all()?;
1387
1388        // Push L1 table and refcount table last as all the clusters they point to are now
1389        // guaranteed to be valid.
1390        let mut sync_required = false;
1391        if self.l1_table.dirty() {
1392            self.raw_file.write_pointer_table(
1393                self.header.l1_table_offset,
1394                self.l1_table.get_values(),
1395                0,
1396            )?;
1397            self.l1_table.mark_clean();
1398            sync_required = true;
1399        }
1400        sync_required |= self.refcounts.flush_table(&mut self.raw_file)?;
1401        if sync_required {
1402            self.raw_file.file_mut().sync_data()?;
1403        }
1404        Ok(())
1405    }
1406
1407    // Reads `count` bytes starting at `address`, calling `cb` repeatedly with the data source,
1408    // number of bytes read so far, offset to read from, and number of bytes to read from the file
1409    // in that invocation. If None is given to `cb` in place of the backing file, the `cb` should
1410    // infer zeros would have been read.
1411    fn read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1412    where
1413        F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,
1414    {
1415        let read_count: usize = self.limit_range_file(address, count);
1416
1417        let mut nread: usize = 0;
1418        while nread < read_count {
1419            let curr_addr = address + nread as u64;
1420            let file_offset = self.file_offset_read(curr_addr)?;
1421            let count = self.limit_range_cluster(curr_addr, read_count - nread);
1422
1423            if let Some(offset) = file_offset {
1424                cb(Some(self.raw_file.file_mut()), nread, offset, count)?;
1425            } else if let Some(backing) = self.backing_file.as_mut() {
1426                cb(Some(backing.as_mut()), nread, curr_addr, count)?;
1427            } else {
1428                cb(None, nread, 0, count)?;
1429            }
1430
1431            nread += count;
1432        }
1433        Ok(read_count)
1434    }
1435
1436    // Writes `count` bytes starting at `address`, calling `cb` repeatedly with the backing file,
1437    // number of bytes written so far, raw file offset, and number of bytes to write to the file in
1438    // that invocation.
1439    fn write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1440    where
1441        F: FnMut(&mut File, usize, u64, usize) -> std::io::Result<()>,
1442    {
1443        let write_count: usize = self.limit_range_file(address, count);
1444
1445        let mut nwritten: usize = 0;
1446        while nwritten < write_count {
1447            let curr_addr = address + nwritten as u64;
1448            let offset = self.file_offset_write(curr_addr)?;
1449            let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1450
1451            cb(self.raw_file.file_mut(), nwritten, offset, count)?;
1452
1453            nwritten += count;
1454        }
1455        Ok(write_count)
1456    }
1457}
1458
1459impl Drop for QcowFile {
1460    fn drop(&mut self) {
1461        let _ = self.inner.get_mut().sync_caches();
1462    }
1463}
1464
1465impl AsRawDescriptors for QcowFile {
1466    fn as_raw_descriptors(&self) -> Vec<RawDescriptor> {
1467        // Taking a lock here feels wrong, but this method is generally only used during
1468        // sandboxing, so it should be OK.
1469        let inner = self.inner.lock();
1470        let mut descriptors = vec![inner.raw_file.file().as_raw_descriptor()];
1471        if let Some(backing) = &inner.backing_file {
1472            descriptors.append(&mut backing.as_raw_descriptors());
1473        }
1474        descriptors
1475    }
1476}
1477
1478impl Read for QcowFile {
1479    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1480        let inner = self.inner.get_mut();
1481        let len = buf.len();
1482        let slice = VolatileSlice::new(buf);
1483        let read_count = inner.read_cb(
1484            inner.current_offset,
1485            len,
1486            |file, already_read, offset, count| {
1487                let sub_slice = slice.get_slice(already_read, count).unwrap();
1488                match file {
1489                    Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1490                    None => {
1491                        sub_slice.write_bytes(0);
1492                        Ok(())
1493                    }
1494                }
1495            },
1496        )?;
1497        inner.current_offset += read_count as u64;
1498        Ok(read_count)
1499    }
1500}
1501
1502impl Seek for QcowFile {
1503    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
1504        let inner = self.inner.get_mut();
1505        let new_offset: Option<u64> = match pos {
1506            SeekFrom::Start(off) => Some(off),
1507            SeekFrom::End(off) => {
1508                if off < 0 {
1509                    0i64.checked_sub(off)
1510                        .and_then(|increment| inner.virtual_size().checked_sub(increment as u64))
1511                } else {
1512                    inner.virtual_size().checked_add(off as u64)
1513                }
1514            }
1515            SeekFrom::Current(off) => {
1516                if off < 0 {
1517                    0i64.checked_sub(off)
1518                        .and_then(|increment| inner.current_offset.checked_sub(increment as u64))
1519                } else {
1520                    inner.current_offset.checked_add(off as u64)
1521                }
1522            }
1523        };
1524
1525        if let Some(o) = new_offset {
1526            if o <= inner.virtual_size() {
1527                inner.current_offset = o;
1528                return Ok(o);
1529            }
1530        }
1531        Err(std::io::Error::from_raw_os_error(EINVAL))
1532    }
1533}
1534
1535impl Write for QcowFile {
1536    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1537        let inner = self.inner.get_mut();
1538        let write_count = inner.write_cb(
1539            inner.current_offset,
1540            buf.len(),
1541            |file, offset, raw_offset, count| {
1542                file.seek(SeekFrom::Start(raw_offset))?;
1543                file.write_all(&buf[offset..(offset + count)])
1544            },
1545        )?;
1546        inner.current_offset += write_count as u64;
1547        Ok(write_count)
1548    }
1549
1550    fn flush(&mut self) -> std::io::Result<()> {
1551        self.fsync()
1552    }
1553}
1554
1555impl FileReadWriteAtVolatile for QcowFile {
1556    fn read_at_volatile(&self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1557        let mut inner = self.inner.lock();
1558        inner.read_cb(offset, slice.size(), |file, read, offset, count| {
1559            let sub_slice = slice.get_slice(read, count).unwrap();
1560            match file {
1561                Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1562                None => {
1563                    sub_slice.write_bytes(0);
1564                    Ok(())
1565                }
1566            }
1567        })
1568    }
1569
1570    fn write_at_volatile(&self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1571        let mut inner = self.inner.lock();
1572        inner.write_cb(offset, slice.size(), |file, offset, raw_offset, count| {
1573            let sub_slice = slice.get_slice(offset, count).unwrap();
1574            file.write_all_at_volatile(sub_slice, raw_offset)
1575        })
1576    }
1577}
1578
1579impl FileSync for QcowFile {
1580    fn fsync(&self) -> std::io::Result<()> {
1581        let mut inner = self.inner.lock();
1582        inner.sync_caches()?;
1583        let unref_clusters = std::mem::take(&mut inner.unref_clusters);
1584        inner.avail_clusters.extend(unref_clusters);
1585        Ok(())
1586    }
1587
1588    fn fdatasync(&self) -> io::Result<()> {
1589        // QcowFile does not implement fdatasync. Just fall back to fsync.
1590        self.fsync()
1591    }
1592}
1593
1594impl FileSetLen for QcowFile {
1595    fn set_len(&self, _len: u64) -> std::io::Result<()> {
1596        Err(std::io::Error::other(
1597            "set_len() not supported for QcowFile",
1598        ))
1599    }
1600}
1601
1602impl DiskGetLen for QcowFile {
1603    fn get_len(&self) -> io::Result<u64> {
1604        Ok(self.virtual_size)
1605    }
1606}
1607
1608impl FileAllocate for QcowFile {
1609    fn allocate(&self, offset: u64, len: u64) -> io::Result<()> {
1610        let mut inner = self.inner.lock();
1611        // Call write_cb with a do-nothing callback, which will have the effect
1612        // of allocating all clusters in the specified range.
1613        inner.write_cb(
1614            offset,
1615            len as usize,
1616            |_file, _offset, _raw_offset, _count| Ok(()),
1617        )?;
1618        Ok(())
1619    }
1620}
1621
1622impl PunchHole for QcowFile {
1623    fn punch_hole(&self, offset: u64, length: u64) -> std::io::Result<()> {
1624        let mut inner = self.inner.lock();
1625        let mut remaining = length;
1626        let mut offset = offset;
1627        while remaining > 0 {
1628            let chunk_length = min(remaining, usize::MAX as u64) as usize;
1629            inner.zero_bytes(offset, chunk_length)?;
1630            remaining -= chunk_length as u64;
1631            offset += chunk_length as u64;
1632        }
1633        Ok(())
1634    }
1635}
1636
1637impl WriteZeroesAt for QcowFile {
1638    fn write_zeroes_at(&self, offset: u64, length: usize) -> io::Result<usize> {
1639        self.punch_hole(offset, length as u64)?;
1640        Ok(length)
1641    }
1642}
1643
1644impl ToAsyncDisk for QcowFile {
1645    fn to_async_disk(self: Box<Self>, ex: &Executor) -> crate::Result<Box<dyn AsyncDisk>> {
1646        Ok(Box::new(AsyncDiskFileWrapper::new(*self, ex)))
1647    }
1648}
1649
1650// Returns an Error if the given offset doesn't align to a cluster boundary.
1651fn offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()> {
1652    if offset & ((0x01 << cluster_bits) - 1) != 0 {
1653        return Err(Error::InvalidOffset(offset));
1654    }
1655    Ok(())
1656}
1657
1658#[cfg(test)]
1659mod tests {
1660    use std::fs::OpenOptions;
1661    use std::io::Read;
1662    use std::io::Seek;
1663    use std::io::SeekFrom;
1664    use std::io::Write;
1665
1666    use tempfile::tempfile;
1667    use tempfile::TempDir;
1668
1669    use super::*;
1670
1671    fn valid_header() -> Vec<u8> {
1672        vec![
1673            0x51u8, 0x46, 0x49, 0xfb, // magic
1674            0x00, 0x00, 0x00, 0x03, // version
1675            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1676            0x00, 0x00, 0x00, 0x00, // backing file size
1677            0x00, 0x00, 0x00, 0x10, // cluster_bits
1678            0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, // size
1679            0x00, 0x00, 0x00, 0x00, // crypt method
1680            0x00, 0x00, 0x01, 0x00, // L1 size
1681            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1682            0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1683            0x00, 0x00, 0x00, 0x03, // refcount table clusters
1684            0x00, 0x00, 0x00, 0x00, // nb snapshots
1685            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1686            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1687            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1688            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1689            0x00, 0x00, 0x00, 0x04, // refcount_order
1690            0x00, 0x00, 0x00, 0x68, // header_length
1691        ]
1692    }
1693
1694    // Test case found by clusterfuzz to allocate excessive memory.
1695    fn test_huge_header() -> Vec<u8> {
1696        vec![
1697            0x51, 0x46, 0x49, 0xfb, // magic
1698            0x00, 0x00, 0x00, 0x03, // version
1699            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1700            0x00, 0x00, 0x00, 0x00, // backing file size
1701            0x00, 0x00, 0x00, 0x09, // cluster_bits
1702            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, // size
1703            0x00, 0x00, 0x00, 0x00, // crypt method
1704            0x00, 0x00, 0x01, 0x00, // L1 size
1705            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1706            0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1707            0x00, 0x00, 0x00, 0x03, // refcount table clusters
1708            0x00, 0x00, 0x00, 0x00, // nb snapshots
1709            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1710            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1711            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1712            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1713            0x00, 0x00, 0x00, 0x04, // refcount_order
1714            0x00, 0x00, 0x00, 0x68, // header_length
1715        ]
1716    }
1717
1718    fn test_params() -> DiskFileParams {
1719        DiskFileParams {
1720            path: PathBuf::from("/foo"),
1721            is_read_only: false,
1722            is_sparse_file: false,
1723            is_overlapped: false,
1724            is_direct: false,
1725            lock: true,
1726            depth: 0,
1727        }
1728    }
1729
1730    fn basic_file(header: &[u8]) -> File {
1731        let mut disk_file = tempfile().expect("failed to create temp file");
1732        disk_file.write_all(header).unwrap();
1733        disk_file.set_len(0x8000_0000).unwrap();
1734        disk_file.seek(SeekFrom::Start(0)).unwrap();
1735        disk_file
1736    }
1737
1738    fn with_basic_file<F>(header: &[u8], mut testfn: F)
1739    where
1740        F: FnMut(File),
1741    {
1742        testfn(basic_file(header)); // File closed when the function exits.
1743    }
1744
1745    fn with_default_file<F>(file_size: u64, mut testfn: F)
1746    where
1747        F: FnMut(QcowFile),
1748    {
1749        let file = tempfile().expect("failed to create temp file");
1750        let qcow_file = QcowFile::new(file, test_params(), file_size).unwrap();
1751
1752        testfn(qcow_file); // File closed when the function exits.
1753    }
1754
1755    // Test helper function to convert a normal slice to a VolatileSlice and write it.
1756    fn write_all_at(qcow: &mut QcowFile, data: &[u8], offset: u64) -> std::io::Result<()> {
1757        let mut mem = data.to_owned();
1758        let vslice = VolatileSlice::new(&mut mem);
1759        qcow.write_all_at_volatile(vslice, offset)
1760    }
1761
1762    // Test helper function to read to a VolatileSlice and copy it to a normal slice.
1763    fn read_exact_at(qcow: &mut QcowFile, data: &mut [u8], offset: u64) -> std::io::Result<()> {
1764        let mut mem = data.to_owned();
1765        let vslice = VolatileSlice::new(&mut mem);
1766        qcow.read_exact_at_volatile(vslice, offset)?;
1767        vslice.copy_to(data);
1768        Ok(())
1769    }
1770
1771    #[test]
1772    fn default_header() {
1773        let header = QcowHeader::create_for_size_and_path(0x10_0000, None);
1774        let mut disk_file = tempfile().expect("failed to create temp file");
1775        header
1776            .expect("Failed to create header.")
1777            .write_to(&mut disk_file)
1778            .expect("Failed to write header to shm.");
1779        disk_file.seek(SeekFrom::Start(0)).unwrap();
1780        QcowFile::from(disk_file, test_params())
1781            .expect("Failed to create Qcow from default Header");
1782    }
1783
1784    #[test]
1785    fn header_read() {
1786        with_basic_file(&valid_header(), |mut disk_file: File| {
1787            QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
1788        });
1789    }
1790
1791    #[test]
1792    fn header_with_backing() {
1793        let header = QcowHeader::create_for_size_and_path(0x10_0000, Some("/my/path/to/a/file"))
1794            .expect("Failed to create header.");
1795        let mut disk_file = tempfile().expect("failed to create temp file");
1796        header
1797            .write_to(&mut disk_file)
1798            .expect("Failed to write header to shm.");
1799        disk_file.seek(SeekFrom::Start(0)).unwrap();
1800        let read_header = QcowHeader::new(&mut disk_file).expect("Failed to create header.");
1801        assert_eq!(
1802            header.backing_file_path,
1803            Some(String::from("/my/path/to/a/file"))
1804        );
1805        assert_eq!(read_header.backing_file_path, header.backing_file_path);
1806    }
1807
1808    #[test]
1809    fn invalid_magic() {
1810        let invalid_header = vec![0x51u8, 0x46, 0x4a, 0xfb];
1811        with_basic_file(&invalid_header, |mut disk_file: File| {
1812            QcowHeader::new(&mut disk_file).expect_err("Invalid header worked.");
1813        });
1814    }
1815
1816    #[test]
1817    fn invalid_refcount_order() {
1818        let mut header = valid_header();
1819        header[99] = 2;
1820        with_basic_file(&header, |disk_file: File| {
1821            QcowFile::from(disk_file, test_params()).expect_err("Invalid refcount order worked.");
1822        });
1823    }
1824
1825    #[test]
1826    fn invalid_cluster_bits() {
1827        let mut header = valid_header();
1828        header[23] = 3;
1829        with_basic_file(&header, |disk_file: File| {
1830            QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1831        });
1832    }
1833
1834    #[test]
1835    fn test_header_huge_file() {
1836        let header = test_huge_header();
1837        with_basic_file(&header, |disk_file: File| {
1838            QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1839        });
1840    }
1841
1842    #[test]
1843    fn test_header_excessive_file_size_rejected() {
1844        let mut header = valid_header();
1845        header[24..32].copy_from_slice(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e]);
1846        with_basic_file(&header, |disk_file: File| {
1847            QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1848        });
1849    }
1850
1851    #[test]
1852    fn test_huge_l1_table() {
1853        let mut header = valid_header();
1854        header[36] = 0x12;
1855        with_basic_file(&header, |disk_file: File| {
1856            QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1857        });
1858    }
1859
1860    #[test]
1861    fn test_header_1_tb_file_min_cluster() {
1862        let mut header = test_huge_header();
1863        header[24] = 0;
1864        header[26] = 1;
1865        header[31] = 0;
1866        // 1 TB with the min cluster size makes the arrays too big, it should fail.
1867        with_basic_file(&header, |disk_file: File| {
1868            QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1869        });
1870    }
1871
1872    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1873    #[test]
1874    fn test_header_1_tb_file() {
1875        let mut header = test_huge_header();
1876        // reset to 1 TB size.
1877        header[24] = 0;
1878        header[26] = 1;
1879        header[31] = 0;
1880        // set cluster_bits
1881        header[23] = 16;
1882        with_basic_file(&header, |disk_file: File| {
1883            let mut qcow =
1884                QcowFile::from(disk_file, test_params()).expect("Failed to create file.");
1885            let value = 0x0000_0040_3f00_ffffu64;
1886            write_all_at(&mut qcow, &value.to_le_bytes(), 0x100_0000_0000 - 8)
1887                .expect("failed to write data");
1888        });
1889    }
1890
1891    #[test]
1892    fn test_header_huge_num_refcounts() {
1893        let mut header = valid_header();
1894        header[56..60].copy_from_slice(&[0x02, 0x00, 0xe8, 0xff]);
1895        with_basic_file(&header, |disk_file: File| {
1896            QcowFile::from(disk_file, test_params())
1897                .expect_err("Created disk with excessive refcount clusters");
1898        });
1899    }
1900
1901    #[test]
1902    fn test_header_huge_refcount_offset() {
1903        let mut header = valid_header();
1904        header[48..56].copy_from_slice(&[0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x02, 0x00]);
1905        with_basic_file(&header, |disk_file: File| {
1906            QcowFile::from(disk_file, test_params())
1907                .expect_err("Created disk with excessive refcount offset");
1908        });
1909    }
1910
1911    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1912    #[test]
1913    fn write_read_start() {
1914        with_basic_file(&valid_header(), |disk_file: File| {
1915            let mut q = QcowFile::from(disk_file, test_params()).unwrap();
1916            write_all_at(&mut q, b"test first bytes", 0).expect("Failed to write test string.");
1917            let mut buf = [0u8; 4];
1918            read_exact_at(&mut q, &mut buf, 0).expect("Failed to read.");
1919            assert_eq!(&buf, b"test");
1920        });
1921    }
1922
1923    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1924    #[test]
1925    fn write_read_start_backing() {
1926        let disk_file = basic_file(&valid_header());
1927        let mut backing = QcowFile::from(disk_file, test_params()).unwrap();
1928        write_all_at(&mut backing, b"test first bytes", 0).expect("Failed to write test string.");
1929        let mut buf = [0u8; 4];
1930        let wrapping_disk_file = basic_file(&valid_header());
1931        let mut wrapping = QcowFile::from(wrapping_disk_file, test_params()).unwrap();
1932        wrapping.set_backing_file(Some(Box::new(backing)));
1933        read_exact_at(&mut wrapping, &mut buf, 0).expect("Failed to read.");
1934        assert_eq!(&buf, b"test");
1935    }
1936
1937    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1938    #[test]
1939    fn write_read_start_backing_overlap() {
1940        let disk_file = basic_file(&valid_header());
1941        let mut backing = QcowFile::from(disk_file, test_params()).unwrap();
1942        write_all_at(&mut backing, b"test first bytes", 0).expect("Failed to write test string.");
1943        let wrapping_disk_file = basic_file(&valid_header());
1944        let mut wrapping = QcowFile::from(wrapping_disk_file, test_params()).unwrap();
1945        wrapping.set_backing_file(Some(Box::new(backing)));
1946        write_all_at(&mut wrapping, b"TEST", 0).expect("Failed to write second test string.");
1947        let mut buf = [0u8; 10];
1948        read_exact_at(&mut wrapping, &mut buf, 0).expect("Failed to read.");
1949        assert_eq!(&buf, b"TEST first");
1950    }
1951
1952    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1953    #[test]
1954    fn offset_write_read() {
1955        with_basic_file(&valid_header(), |disk_file: File| {
1956            let mut q = QcowFile::from(disk_file, test_params()).unwrap();
1957            let b = [0x55u8; 0x1000];
1958            write_all_at(&mut q, &b, 0xfff2000).expect("Failed to write test string.");
1959            let mut buf = [0u8; 4];
1960            read_exact_at(&mut q, &mut buf, 0xfff2000).expect("Failed to read.");
1961            assert_eq!(buf[0], 0x55);
1962        });
1963    }
1964
1965    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1966    #[test]
1967    fn write_zeroes_read() {
1968        with_basic_file(&valid_header(), |disk_file: File| {
1969            let mut q = QcowFile::from(disk_file, test_params()).unwrap();
1970            // Write some test data.
1971            let b = [0x55u8; 0x1000];
1972            write_all_at(&mut q, &b, 0xfff2000).expect("Failed to write test string.");
1973            // Overwrite the test data with zeroes.
1974            q.write_zeroes_all_at(0xfff2000, 0x200)
1975                .expect("Failed to write zeroes.");
1976            // Verify that the correct part of the data was zeroed out.
1977            let mut buf = [0u8; 0x1000];
1978            read_exact_at(&mut q, &mut buf, 0xfff2000).expect("Failed to read.");
1979            assert_eq!(buf[0], 0);
1980            assert_eq!(buf[0x1FF], 0);
1981            assert_eq!(buf[0x200], 0x55);
1982            assert_eq!(buf[0xFFF], 0x55);
1983        });
1984    }
1985
1986    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1987    #[test]
1988    fn write_zeroes_full_cluster() {
1989        // Choose a size that is larger than a cluster.
1990        // valid_header uses cluster_bits = 12, which corresponds to a cluster size of 4096.
1991        const CHUNK_SIZE: usize = 4096 * 2 + 512;
1992        with_basic_file(&valid_header(), |disk_file: File| {
1993            let mut q = QcowFile::from(disk_file, test_params()).unwrap();
1994            // Write some test data.
1995            let b = [0x55u8; CHUNK_SIZE];
1996            write_all_at(&mut q, &b, 0).expect("Failed to write test string.");
1997            // Overwrite the full cluster with zeroes.
1998            q.write_zeroes_all_at(0, CHUNK_SIZE)
1999                .expect("Failed to write zeroes.");
2000            // Verify that the data was zeroed out.
2001            let mut buf = [0u8; CHUNK_SIZE];
2002            read_exact_at(&mut q, &mut buf, 0).expect("Failed to read.");
2003            assert_eq!(buf[0], 0);
2004            assert_eq!(buf[CHUNK_SIZE - 1], 0);
2005        });
2006    }
2007
2008    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2009    #[test]
2010    fn write_zeroes_backing() {
2011        let disk_file = basic_file(&valid_header());
2012        let mut backing = QcowFile::from(disk_file, test_params()).unwrap();
2013        // Write some test data.
2014        let b = [0x55u8; 0x1000];
2015        write_all_at(&mut backing, &b, 0xfff2000).expect("Failed to write test string.");
2016        let wrapping_disk_file = basic_file(&valid_header());
2017        let mut wrapping = QcowFile::from(wrapping_disk_file, test_params()).unwrap();
2018        wrapping.set_backing_file(Some(Box::new(backing)));
2019        // Overwrite the test data with zeroes.
2020        // This should allocate new clusters in the wrapping file so that they can be zeroed.
2021        wrapping
2022            .write_zeroes_all_at(0xfff2000, 0x200)
2023            .expect("Failed to write zeroes.");
2024        // Verify that the correct part of the data was zeroed out.
2025        let mut buf = [0u8; 0x1000];
2026        read_exact_at(&mut wrapping, &mut buf, 0xfff2000).expect("Failed to read.");
2027        assert_eq!(buf[0], 0);
2028        assert_eq!(buf[0x1FF], 0);
2029        assert_eq!(buf[0x200], 0x55);
2030        assert_eq!(buf[0xFFF], 0x55);
2031    }
2032    #[test]
2033    fn test_header() {
2034        with_basic_file(&valid_header(), |disk_file: File| {
2035            let mut q = QcowFile::from(disk_file, test_params()).unwrap();
2036            assert_eq!(q.inner.get_mut().virtual_size(), 0x20_0000_0000);
2037        });
2038    }
2039
2040    #[test]
2041    fn read_small_buffer() {
2042        with_basic_file(&valid_header(), |disk_file: File| {
2043            let mut q = QcowFile::from(disk_file, test_params()).unwrap();
2044            let mut b = [5u8; 16];
2045            read_exact_at(&mut q, &mut b, 1000).expect("Failed to read.");
2046            assert_eq!(0, b[0]);
2047            assert_eq!(0, b[15]);
2048        });
2049    }
2050
2051    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2052    #[test]
2053    fn replay_ext4() {
2054        with_basic_file(&valid_header(), |disk_file: File| {
2055            let mut q = QcowFile::from(disk_file, test_params()).unwrap();
2056            const BUF_SIZE: usize = 0x1000;
2057            let mut b = [0u8; BUF_SIZE];
2058
2059            struct Transfer {
2060                pub write: bool,
2061                pub addr: u64,
2062            }
2063
2064            // Write transactions from mkfs.ext4.
2065            let xfers: Vec<Transfer> = vec![
2066                Transfer {
2067                    write: false,
2068                    addr: 0xfff0000,
2069                },
2070                Transfer {
2071                    write: false,
2072                    addr: 0xfffe000,
2073                },
2074                Transfer {
2075                    write: false,
2076                    addr: 0x0,
2077                },
2078                Transfer {
2079                    write: false,
2080                    addr: 0x1000,
2081                },
2082                Transfer {
2083                    write: false,
2084                    addr: 0xffff000,
2085                },
2086                Transfer {
2087                    write: false,
2088                    addr: 0xffdf000,
2089                },
2090                Transfer {
2091                    write: false,
2092                    addr: 0xfff8000,
2093                },
2094                Transfer {
2095                    write: false,
2096                    addr: 0xffe0000,
2097                },
2098                Transfer {
2099                    write: false,
2100                    addr: 0xffce000,
2101                },
2102                Transfer {
2103                    write: false,
2104                    addr: 0xffb6000,
2105                },
2106                Transfer {
2107                    write: false,
2108                    addr: 0xffab000,
2109                },
2110                Transfer {
2111                    write: false,
2112                    addr: 0xffa4000,
2113                },
2114                Transfer {
2115                    write: false,
2116                    addr: 0xff8e000,
2117                },
2118                Transfer {
2119                    write: false,
2120                    addr: 0xff86000,
2121                },
2122                Transfer {
2123                    write: false,
2124                    addr: 0xff84000,
2125                },
2126                Transfer {
2127                    write: false,
2128                    addr: 0xff89000,
2129                },
2130                Transfer {
2131                    write: false,
2132                    addr: 0xfe7e000,
2133                },
2134                Transfer {
2135                    write: false,
2136                    addr: 0x100000,
2137                },
2138                Transfer {
2139                    write: false,
2140                    addr: 0x3000,
2141                },
2142                Transfer {
2143                    write: false,
2144                    addr: 0x7000,
2145                },
2146                Transfer {
2147                    write: false,
2148                    addr: 0xf000,
2149                },
2150                Transfer {
2151                    write: false,
2152                    addr: 0x2000,
2153                },
2154                Transfer {
2155                    write: false,
2156                    addr: 0x4000,
2157                },
2158                Transfer {
2159                    write: false,
2160                    addr: 0x5000,
2161                },
2162                Transfer {
2163                    write: false,
2164                    addr: 0x6000,
2165                },
2166                Transfer {
2167                    write: false,
2168                    addr: 0x8000,
2169                },
2170                Transfer {
2171                    write: false,
2172                    addr: 0x9000,
2173                },
2174                Transfer {
2175                    write: false,
2176                    addr: 0xa000,
2177                },
2178                Transfer {
2179                    write: false,
2180                    addr: 0xb000,
2181                },
2182                Transfer {
2183                    write: false,
2184                    addr: 0xc000,
2185                },
2186                Transfer {
2187                    write: false,
2188                    addr: 0xd000,
2189                },
2190                Transfer {
2191                    write: false,
2192                    addr: 0xe000,
2193                },
2194                Transfer {
2195                    write: false,
2196                    addr: 0x10000,
2197                },
2198                Transfer {
2199                    write: false,
2200                    addr: 0x11000,
2201                },
2202                Transfer {
2203                    write: false,
2204                    addr: 0x12000,
2205                },
2206                Transfer {
2207                    write: false,
2208                    addr: 0x13000,
2209                },
2210                Transfer {
2211                    write: false,
2212                    addr: 0x14000,
2213                },
2214                Transfer {
2215                    write: false,
2216                    addr: 0x15000,
2217                },
2218                Transfer {
2219                    write: false,
2220                    addr: 0x16000,
2221                },
2222                Transfer {
2223                    write: false,
2224                    addr: 0x17000,
2225                },
2226                Transfer {
2227                    write: false,
2228                    addr: 0x18000,
2229                },
2230                Transfer {
2231                    write: false,
2232                    addr: 0x19000,
2233                },
2234                Transfer {
2235                    write: false,
2236                    addr: 0x1a000,
2237                },
2238                Transfer {
2239                    write: false,
2240                    addr: 0x1b000,
2241                },
2242                Transfer {
2243                    write: false,
2244                    addr: 0x1c000,
2245                },
2246                Transfer {
2247                    write: false,
2248                    addr: 0x1d000,
2249                },
2250                Transfer {
2251                    write: false,
2252                    addr: 0x1e000,
2253                },
2254                Transfer {
2255                    write: false,
2256                    addr: 0x1f000,
2257                },
2258                Transfer {
2259                    write: false,
2260                    addr: 0x21000,
2261                },
2262                Transfer {
2263                    write: false,
2264                    addr: 0x22000,
2265                },
2266                Transfer {
2267                    write: false,
2268                    addr: 0x24000,
2269                },
2270                Transfer {
2271                    write: false,
2272                    addr: 0x40000,
2273                },
2274                Transfer {
2275                    write: false,
2276                    addr: 0x0,
2277                },
2278                Transfer {
2279                    write: false,
2280                    addr: 0x3000,
2281                },
2282                Transfer {
2283                    write: false,
2284                    addr: 0x7000,
2285                },
2286                Transfer {
2287                    write: false,
2288                    addr: 0x0,
2289                },
2290                Transfer {
2291                    write: false,
2292                    addr: 0x1000,
2293                },
2294                Transfer {
2295                    write: false,
2296                    addr: 0x2000,
2297                },
2298                Transfer {
2299                    write: false,
2300                    addr: 0x3000,
2301                },
2302                Transfer {
2303                    write: false,
2304                    addr: 0x0,
2305                },
2306                Transfer {
2307                    write: false,
2308                    addr: 0x449000,
2309                },
2310                Transfer {
2311                    write: false,
2312                    addr: 0x48000,
2313                },
2314                Transfer {
2315                    write: false,
2316                    addr: 0x48000,
2317                },
2318                Transfer {
2319                    write: false,
2320                    addr: 0x448000,
2321                },
2322                Transfer {
2323                    write: false,
2324                    addr: 0x44a000,
2325                },
2326                Transfer {
2327                    write: false,
2328                    addr: 0x48000,
2329                },
2330                Transfer {
2331                    write: false,
2332                    addr: 0x48000,
2333                },
2334                Transfer {
2335                    write: true,
2336                    addr: 0x0,
2337                },
2338                Transfer {
2339                    write: true,
2340                    addr: 0x448000,
2341                },
2342                Transfer {
2343                    write: true,
2344                    addr: 0x449000,
2345                },
2346                Transfer {
2347                    write: true,
2348                    addr: 0x44a000,
2349                },
2350                Transfer {
2351                    write: true,
2352                    addr: 0xfff0000,
2353                },
2354                Transfer {
2355                    write: true,
2356                    addr: 0xfff1000,
2357                },
2358                Transfer {
2359                    write: true,
2360                    addr: 0xfff2000,
2361                },
2362                Transfer {
2363                    write: true,
2364                    addr: 0xfff3000,
2365                },
2366                Transfer {
2367                    write: true,
2368                    addr: 0xfff4000,
2369                },
2370                Transfer {
2371                    write: true,
2372                    addr: 0xfff5000,
2373                },
2374                Transfer {
2375                    write: true,
2376                    addr: 0xfff6000,
2377                },
2378                Transfer {
2379                    write: true,
2380                    addr: 0xfff7000,
2381                },
2382                Transfer {
2383                    write: true,
2384                    addr: 0xfff8000,
2385                },
2386                Transfer {
2387                    write: true,
2388                    addr: 0xfff9000,
2389                },
2390                Transfer {
2391                    write: true,
2392                    addr: 0xfffa000,
2393                },
2394                Transfer {
2395                    write: true,
2396                    addr: 0xfffb000,
2397                },
2398                Transfer {
2399                    write: true,
2400                    addr: 0xfffc000,
2401                },
2402                Transfer {
2403                    write: true,
2404                    addr: 0xfffd000,
2405                },
2406                Transfer {
2407                    write: true,
2408                    addr: 0xfffe000,
2409                },
2410                Transfer {
2411                    write: true,
2412                    addr: 0xffff000,
2413                },
2414            ];
2415
2416            for xfer in &xfers {
2417                if xfer.write {
2418                    write_all_at(&mut q, &b, xfer.addr).expect("Failed to write.");
2419                } else {
2420                    read_exact_at(&mut q, &mut b, xfer.addr).expect("Failed to read.");
2421                }
2422            }
2423        });
2424    }
2425
2426    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2427    #[test]
2428    fn combo_write_read() {
2429        with_default_file(1024 * 1024 * 1024 * 256, |mut qcow_file| {
2430            const NUM_BLOCKS: usize = 55;
2431            const BLOCK_SIZE: usize = 0x1_0000;
2432            const OFFSET: u64 = 0x1_0000_0020;
2433            let data = [0x55u8; BLOCK_SIZE];
2434            let mut readback = [0u8; BLOCK_SIZE];
2435            for i in 0..NUM_BLOCKS {
2436                let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2437                write_all_at(&mut qcow_file, &data, seek_offset)
2438                    .expect("Failed to write test data.");
2439                // Read back the data to check it was written correctly.
2440                read_exact_at(&mut qcow_file, &mut readback, seek_offset).expect("Failed to read.");
2441                for (orig, read) in data.iter().zip(readback.iter()) {
2442                    assert_eq!(orig, read);
2443                }
2444            }
2445            // Check that address 0 is still zeros.
2446            read_exact_at(&mut qcow_file, &mut readback, 0).expect("Failed to read.");
2447            for read in readback.iter() {
2448                assert_eq!(*read, 0);
2449            }
2450            // Check the data again after the writes have happened.
2451            for i in 0..NUM_BLOCKS {
2452                let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2453                read_exact_at(&mut qcow_file, &mut readback, seek_offset).expect("Failed to read.");
2454                for (orig, read) in data.iter().zip(readback.iter()) {
2455                    assert_eq!(orig, read);
2456                }
2457            }
2458
2459            assert_eq!(
2460                qcow_file.inner.get_mut().first_zero_refcount().unwrap(),
2461                None
2462            );
2463        });
2464    }
2465
2466    #[test]
2467    fn rebuild_refcounts() {
2468        with_basic_file(&valid_header(), |mut disk_file: File| {
2469            let header = QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
2470            let cluster_size = 65536;
2471            let mut raw_file =
2472                QcowRawFile::from(disk_file, cluster_size).expect("Failed to create QcowRawFile.");
2473            QcowFileInner::rebuild_refcounts(&mut raw_file, header)
2474                .expect("Failed to rebuild recounts.");
2475        });
2476    }
2477
2478    #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2479    #[test]
2480    fn nested_qcow() {
2481        let tmp_dir = TempDir::new().unwrap();
2482
2483        // A file `backing` is backing a qcow file `qcow.l1`, which in turn is backing another
2484        // qcow file.
2485        let backing_file_path = tmp_dir.path().join("backing");
2486        let _backing_file = OpenOptions::new()
2487            .read(true)
2488            .write(true)
2489            .create_new(true)
2490            .open(&backing_file_path)
2491            .unwrap();
2492
2493        let level1_qcow_file_path = tmp_dir.path().join("qcow.l1");
2494        let level1_qcow_file = OpenOptions::new()
2495            .read(true)
2496            .write(true)
2497            .create_new(true)
2498            .open(&level1_qcow_file_path)
2499            .unwrap();
2500        let _level1_qcow_file = QcowFile::new_from_backing(
2501            level1_qcow_file,
2502            test_params(),
2503            backing_file_path.to_str().unwrap(),
2504        )
2505        .unwrap();
2506
2507        let level2_qcow_file = tempfile().unwrap();
2508        let _level2_qcow_file = QcowFile::new_from_backing(
2509            level2_qcow_file,
2510            test_params(),
2511            level1_qcow_file_path.to_str().unwrap(),
2512        )
2513        .expect("failed to create level2 qcow file");
2514    }
2515
2516    #[test]
2517    fn io_seek() {
2518        with_default_file(1024 * 1024 * 10, |mut qcow_file| {
2519            // Cursor should start at 0.
2520            assert_eq!(qcow_file.stream_position().unwrap(), 0);
2521
2522            // Seek 1 MB from start.
2523            assert_eq!(
2524                qcow_file.seek(SeekFrom::Start(1024 * 1024)).unwrap(),
2525                1024 * 1024
2526            );
2527
2528            // Rewind 1 MB + 1 byte (past beginning) - seeking to a negative offset is an error and
2529            // should not move the cursor.
2530            qcow_file
2531                .seek(SeekFrom::Current(-(1024 * 1024 + 1)))
2532                .expect_err("negative offset seek should fail");
2533            assert_eq!(qcow_file.stream_position().unwrap(), 1024 * 1024);
2534
2535            // Seek to last byte.
2536            assert_eq!(
2537                qcow_file.seek(SeekFrom::End(-1)).unwrap(),
2538                1024 * 1024 * 10 - 1
2539            );
2540
2541            // Seek to EOF.
2542            assert_eq!(qcow_file.seek(SeekFrom::End(0)).unwrap(), 1024 * 1024 * 10);
2543
2544            // Seek past EOF is not allowed.
2545            qcow_file
2546                .seek(SeekFrom::End(1))
2547                .expect_err("seek past EOF should fail");
2548        });
2549    }
2550
2551    #[test]
2552    fn io_write_read() {
2553        with_default_file(1024 * 1024 * 10, |mut qcow_file| {
2554            const BLOCK_SIZE: usize = 0x1_0000;
2555            let data_55 = [0x55u8; BLOCK_SIZE];
2556            let data_aa = [0xaau8; BLOCK_SIZE];
2557            let mut readback = [0u8; BLOCK_SIZE];
2558
2559            qcow_file.write_all(&data_55).unwrap();
2560            assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64);
2561
2562            qcow_file.write_all(&data_aa).unwrap();
2563            assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64 * 2);
2564
2565            // Read BLOCK_SIZE of just 0xaa.
2566            assert_eq!(
2567                qcow_file
2568                    .seek(SeekFrom::Current(-(BLOCK_SIZE as i64)))
2569                    .unwrap(),
2570                BLOCK_SIZE as u64
2571            );
2572            qcow_file.read_exact(&mut readback).unwrap();
2573            assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64 * 2);
2574            for (orig, read) in data_aa.iter().zip(readback.iter()) {
2575                assert_eq!(orig, read);
2576            }
2577
2578            // Read BLOCK_SIZE of just 0x55.
2579            qcow_file.rewind().unwrap();
2580            qcow_file.read_exact(&mut readback).unwrap();
2581            for (orig, read) in data_55.iter().zip(readback.iter()) {
2582                assert_eq!(orig, read);
2583            }
2584
2585            // Read BLOCK_SIZE crossing between the block of 0x55 and 0xaa.
2586            qcow_file
2587                .seek(SeekFrom::Start(BLOCK_SIZE as u64 / 2))
2588                .unwrap();
2589            qcow_file.read_exact(&mut readback).unwrap();
2590            for (orig, read) in data_55[BLOCK_SIZE / 2..]
2591                .iter()
2592                .chain(data_aa[..BLOCK_SIZE / 2].iter())
2593                .zip(readback.iter())
2594            {
2595                assert_eq!(orig, read);
2596            }
2597        });
2598    }
2599}