disk/
disk.rs

1// Copyright 2019 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! VM disk image file format I/O.
6
7use std::cmp::min;
8use std::fmt::Debug;
9use std::fs::File;
10use std::io;
11use std::io::Seek;
12use std::io::SeekFrom;
13use std::path::PathBuf;
14use std::sync::Arc;
15
16use async_trait::async_trait;
17use base::info;
18use base::AsRawDescriptors;
19use base::FileAllocate;
20use base::FileReadWriteAtVolatile;
21use base::FileSetLen;
22use cros_async::BackingMemory;
23use cros_async::Executor;
24use cros_async::IoSource;
25use cros_async::MemRegionIter;
26use thiserror::Error as ThisError;
27
28mod asynchronous;
29#[allow(unused)]
30pub(crate) use asynchronous::AsyncDiskFileWrapper;
31#[cfg(feature = "qcow")]
32mod qcow;
33#[cfg(feature = "qcow")]
34pub use qcow::QcowFile;
35#[cfg(feature = "qcow")]
36pub use qcow::QCOW_MAGIC;
37mod sys;
38
39#[cfg(feature = "composite-disk")]
40mod composite;
41#[cfg(feature = "composite-disk")]
42use composite::CompositeDiskFile;
43#[cfg(feature = "composite-disk")]
44use composite::CDISK_MAGIC;
45#[cfg(feature = "composite-disk")]
46mod gpt;
47#[cfg(feature = "composite-disk")]
48pub use composite::create_composite_disk;
49#[cfg(feature = "composite-disk")]
50pub use composite::create_zero_filler;
51#[cfg(feature = "composite-disk")]
52pub use composite::Error as CompositeError;
53#[cfg(feature = "composite-disk")]
54pub use composite::ImagePartitionType;
55#[cfg(feature = "composite-disk")]
56pub use composite::PartitionInfo;
57#[cfg(feature = "composite-disk")]
58pub use gpt::Error as GptError;
59
60#[cfg(feature = "android-sparse")]
61mod android_sparse;
62#[cfg(feature = "android-sparse")]
63use android_sparse::AndroidSparse;
64#[cfg(feature = "android-sparse")]
65use android_sparse::SPARSE_HEADER_MAGIC;
66use sys::read_from_disk;
67
68#[cfg(feature = "zstd")]
69mod zstd;
70#[cfg(feature = "zstd")]
71use zstd::ZstdDisk;
72#[cfg(feature = "zstd")]
73use zstd::ZSTD_FRAME_MAGIC;
74#[cfg(feature = "zstd")]
75use zstd::ZSTD_SKIPPABLE_MAGIC_HIGH;
76#[cfg(feature = "zstd")]
77use zstd::ZSTD_SKIPPABLE_MAGIC_LOW;
78
79/// Nesting depth limit for disk formats that can open other disk files.
80const MAX_NESTING_DEPTH: u32 = 10;
81
82#[derive(ThisError, Debug)]
83pub enum Error {
84    #[error("failed to create block device: {0}")]
85    BlockDeviceNew(base::Error),
86    #[error("requested file conversion not supported")]
87    ConversionNotSupported,
88    #[cfg(feature = "android-sparse")]
89    #[error("failure in android sparse disk: {0}")]
90    CreateAndroidSparseDisk(android_sparse::Error),
91    #[cfg(feature = "composite-disk")]
92    #[error("failure in composite disk: {0}")]
93    CreateCompositeDisk(composite::Error),
94    #[cfg(feature = "zstd")]
95    #[error("failure in zstd disk: {0}")]
96    CreateZstdDisk(anyhow::Error),
97    #[error("failure creating single file disk: {0}")]
98    CreateSingleFileDisk(cros_async::AsyncError),
99    #[error("failed to set O_DIRECT on disk image: {0}")]
100    DirectFailed(base::Error),
101    #[error("failure with fdatasync: {0}")]
102    Fdatasync(cros_async::AsyncError),
103    #[error("failure with fsync: {0}")]
104    Fsync(cros_async::AsyncError),
105    #[error("failed to lock file: {0}")]
106    LockFileFailure(base::Error),
107    #[error("failure with fdatasync: {0}")]
108    IoFdatasync(io::Error),
109    #[error("failure with flush: {0}")]
110    IoFlush(io::Error),
111    #[error("failure with fsync: {0}")]
112    IoFsync(io::Error),
113    #[error("failure to punch hole: {0}")]
114    IoPunchHole(io::Error),
115    #[error("checking host fs type: {0}")]
116    HostFsType(base::Error),
117    #[error("maximum disk nesting depth exceeded")]
118    MaxNestingDepthExceeded,
119    #[error("failed to open disk file \"{0}\": {1}")]
120    OpenFile(String, base::Error),
121    #[error("failure to punch hole: {0}")]
122    PunchHole(cros_async::AsyncError),
123    #[error("failure to punch hole for block device file: {0}")]
124    PunchHoleBlockDeviceFile(base::Error),
125    #[cfg(feature = "qcow")]
126    #[error("failure in qcow: {0}")]
127    QcowError(qcow::Error),
128    #[error("failed to read data: {0}")]
129    ReadingData(io::Error),
130    #[error("failed to read header: {0}")]
131    ReadingHeader(io::Error),
132    #[error("failed to read to memory: {0}")]
133    ReadToMem(cros_async::AsyncError),
134    #[error("failed to seek file: {0}")]
135    SeekingFile(io::Error),
136    #[error("failed to set file size: {0}")]
137    SettingFileSize(io::Error),
138    #[error("unknown disk type")]
139    UnknownType,
140    #[error("failed to write from memory: {0}")]
141    WriteFromMem(cros_async::AsyncError),
142    #[error("failed to write from vec: {0}")]
143    WriteFromVec(cros_async::AsyncError),
144    #[error("failed to write zeroes: {0}")]
145    WriteZeroes(io::Error),
146    #[error("failed to write data: {0}")]
147    WritingData(io::Error),
148    #[error("failed to convert to async: {0}")]
149    ToAsync(cros_async::AsyncError),
150    #[cfg(windows)]
151    #[error("failed to set disk file sparse: {0}")]
152    SetSparseFailure(io::Error),
153    #[error("failure with guest memory access: {0}")]
154    GuestMemory(cros_async::mem::Error),
155    #[error("unsupported operation")]
156    UnsupportedOperation,
157}
158
159pub type Result<T> = std::result::Result<T, Error>;
160
161/// A trait for getting the length of a disk image or raw block device.
162pub trait DiskGetLen {
163    /// Get the current length of the disk in bytes.
164    fn get_len(&self) -> io::Result<u64>;
165}
166
167impl DiskGetLen for File {
168    fn get_len(&self) -> io::Result<u64> {
169        let mut s = self;
170        let orig_seek = s.stream_position()?;
171        let end = s.seek(SeekFrom::End(0))?;
172        s.seek(SeekFrom::Start(orig_seek))?;
173        Ok(end)
174    }
175}
176
177/// The prerequisites necessary to support a block device.
178pub trait DiskFile:
179    FileSetLen + DiskGetLen + FileReadWriteAtVolatile + ToAsyncDisk + Send + AsRawDescriptors + Debug
180{
181    /// Creates a new DiskFile instance that shares the same underlying disk file image. IO
182    /// operations to a DiskFile should affect all DiskFile instances with the same underlying disk
183    /// file image.
184    ///
185    /// `try_clone()` returns [`io::ErrorKind::Unsupported`] Error if a DiskFile does not support
186    /// creating an instance with the same underlying disk file image.
187    fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
188        Err(io::Error::new(
189            io::ErrorKind::Unsupported,
190            "unsupported operation",
191        ))
192    }
193}
194
195/// A `DiskFile` that can be converted for asychronous access.
196pub trait ToAsyncDisk: AsRawDescriptors + DiskGetLen + Send {
197    /// Convert a boxed self in to a box-wrapped implementaiton of AsyncDisk.
198    /// Used to convert a standard disk image to an async disk image. This conversion and the
199    /// inverse are needed so that the `Send` DiskImage can be given to the block thread where it is
200    /// converted to a non-`Send` AsyncDisk. The AsyncDisk can then be converted back and returned
201    /// to the main device thread if the block device is destroyed or reset.
202    fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>;
203}
204
205impl ToAsyncDisk for File {
206    fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>> {
207        Ok(Box::new(SingleFileDisk::new(*self, ex)?))
208    }
209}
210
211/// The variants of image files on the host that can be used as virtual disks.
212#[derive(Debug, PartialEq, Eq)]
213pub enum ImageType {
214    Raw,
215    Qcow2,
216    CompositeDisk,
217    AndroidSparse,
218    Zstd,
219}
220
221/// Detect the type of an image file by checking for a valid header of the supported formats.
222pub fn detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType> {
223    let mut f = file;
224    let disk_size = f.get_len().map_err(Error::SeekingFile)?;
225    let orig_seek = f.stream_position().map_err(Error::SeekingFile)?;
226
227    info!("disk size {}", disk_size);
228
229    // Try to read the disk in a nicely-aligned block size unless the whole file is smaller.
230    const MAGIC_BLOCK_SIZE: usize = 4096;
231    #[repr(align(4096))]
232    struct BlockAlignedBuffer {
233        data: [u8; MAGIC_BLOCK_SIZE],
234    }
235    let mut magic = BlockAlignedBuffer {
236        data: [0u8; MAGIC_BLOCK_SIZE],
237    };
238    let magic_read_len = if disk_size > MAGIC_BLOCK_SIZE as u64 {
239        MAGIC_BLOCK_SIZE
240    } else {
241        // This cast is safe since we know disk_size is less than MAGIC_BLOCK_SIZE (4096) and
242        // therefore is representable in usize.
243        disk_size as usize
244    };
245
246    read_from_disk(f, 0, &mut magic.data[0..magic_read_len], overlapped_mode)?;
247    f.seek(SeekFrom::Start(orig_seek))
248        .map_err(Error::SeekingFile)?;
249
250    #[cfg(feature = "composite-disk")]
251    if let Some(cdisk_magic) = magic.data.get(0..CDISK_MAGIC.len()) {
252        if cdisk_magic == CDISK_MAGIC.as_bytes() {
253            return Ok(ImageType::CompositeDisk);
254        }
255    }
256
257    #[allow(unused_variables)] // magic4 is only used with the qcow/android-sparse/zstd features.
258    if let Some(magic4) = magic
259        .data
260        .get(0..4)
261        .and_then(|v| <&[u8] as std::convert::TryInto<[u8; 4]>>::try_into(v).ok())
262    {
263        #[cfg(feature = "qcow")]
264        if magic4 == QCOW_MAGIC.to_be_bytes() {
265            return Ok(ImageType::Qcow2);
266        }
267        #[cfg(feature = "android-sparse")]
268        if magic4 == SPARSE_HEADER_MAGIC.to_le_bytes() {
269            return Ok(ImageType::AndroidSparse);
270        }
271        #[cfg(feature = "zstd")]
272        if u32::from_le_bytes(magic4) == ZSTD_FRAME_MAGIC
273            || (u32::from_le_bytes(magic4) >= ZSTD_SKIPPABLE_MAGIC_LOW
274                && u32::from_le_bytes(magic4) <= ZSTD_SKIPPABLE_MAGIC_HIGH)
275        {
276            return Ok(ImageType::Zstd);
277        }
278    }
279
280    Ok(ImageType::Raw)
281}
282
283impl DiskFile for File {
284    fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
285        Ok(Box::new(self.try_clone()?))
286    }
287}
288
289pub struct DiskFileParams {
290    pub path: PathBuf,
291    pub is_read_only: bool,
292    // Whether to call `base::set_sparse_file` on the file. Currently only affects Windows and is
293    // irrelevant for read only files.
294    pub is_sparse_file: bool,
295    // Whether to open the file in overlapped mode. Only affects Windows.
296    pub is_overlapped: bool,
297    // Whether to disable OS page caches / buffering.
298    pub is_direct: bool,
299    // Whether to lock the file.
300    pub lock: bool,
301    // The nesting depth of the file. Used to avoid infinite recursion. Users outside the disk
302    // crate should set this to zero.
303    pub depth: u32,
304}
305
306/// Inspect the image file type and create an appropriate disk file to match it.
307pub fn open_disk_file(params: DiskFileParams) -> Result<Box<dyn DiskFile>> {
308    if params.depth > MAX_NESTING_DEPTH {
309        return Err(Error::MaxNestingDepthExceeded);
310    }
311
312    let raw_image = sys::open_raw_disk_image(&params)?;
313    let image_type = detect_image_type(&raw_image, params.is_overlapped)?;
314    Ok(match image_type {
315        ImageType::Raw => {
316            sys::apply_raw_disk_file_options(&raw_image, params.is_sparse_file)?;
317            Box::new(raw_image) as Box<dyn DiskFile>
318        }
319        #[cfg(feature = "qcow")]
320        ImageType::Qcow2 => Box::new(QcowFile::from(raw_image, params).map_err(Error::QcowError)?)
321            as Box<dyn DiskFile>,
322        #[cfg(feature = "composite-disk")]
323        ImageType::CompositeDisk => {
324            // Valid composite disk header present
325            Box::new(
326                CompositeDiskFile::from_file(raw_image, params)
327                    .map_err(Error::CreateCompositeDisk)?,
328            ) as Box<dyn DiskFile>
329        }
330        #[cfg(feature = "android-sparse")]
331        ImageType::AndroidSparse => {
332            Box::new(AndroidSparse::from_file(raw_image).map_err(Error::CreateAndroidSparseDisk)?)
333                as Box<dyn DiskFile>
334        }
335        #[cfg(feature = "zstd")]
336        ImageType::Zstd => Box::new(ZstdDisk::from_file(raw_image).map_err(Error::CreateZstdDisk)?)
337            as Box<dyn DiskFile>,
338        #[allow(unreachable_patterns)]
339        _ => return Err(Error::UnknownType),
340    })
341}
342
343/// An asynchronously accessible disk.
344#[async_trait(?Send)]
345pub trait AsyncDisk: DiskGetLen + FileSetLen + FileAllocate {
346    /// Flush intermediary buffers and/or dirty state to file. fsync not required.
347    async fn flush(&self) -> Result<()>;
348
349    /// Asynchronously fsyncs any completed operations to the disk.
350    async fn fsync(&self) -> Result<()>;
351
352    /// Asynchronously fdatasyncs any completed operations to the disk.
353    /// Note that an implementation may simply call fsync for fdatasync.
354    async fn fdatasync(&self) -> Result<()>;
355
356    /// Reads from the file at 'file_offset' into memory `mem` at `mem_offsets`.
357    /// `mem_offsets` is similar to an iovec except relative to the start of `mem`.
358    async fn read_to_mem<'a>(
359        &'a self,
360        file_offset: u64,
361        mem: Arc<dyn BackingMemory + Send + Sync>,
362        mem_offsets: cros_async::MemRegionIter<'a>,
363    ) -> Result<usize>;
364
365    /// Writes to the file at 'file_offset' from memory `mem` at `mem_offsets`.
366    async fn write_from_mem<'a>(
367        &'a self,
368        file_offset: u64,
369        mem: Arc<dyn BackingMemory + Send + Sync>,
370        mem_offsets: cros_async::MemRegionIter<'a>,
371    ) -> Result<usize>;
372
373    /// Replaces a range of bytes with a hole.
374    async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()>;
375
376    /// Writes up to `length` bytes of zeroes to the stream, returning how many bytes were written.
377    async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>;
378
379    /// Reads from the file at 'file_offset' into `buf`.
380    ///
381    /// Less efficient than `read_to_mem` because of extra copies and allocations.
382    async fn read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize> {
383        let backing_mem = Arc::new(cros_async::VecIoWrapper::from(vec![0u8; buf.len()]));
384        let region = cros_async::MemRegion {
385            offset: 0,
386            len: buf.len(),
387        };
388        let n = self
389            .read_to_mem(
390                file_offset,
391                backing_mem.clone(),
392                MemRegionIter::new(&[region]),
393            )
394            .await?;
395        backing_mem
396            .get_volatile_slice(region)
397            .expect("BUG: the VecIoWrapper shrank?")
398            .sub_slice(0, n)
399            .expect("BUG: read_to_mem return value too large?")
400            .copy_to(buf);
401        Ok(n)
402    }
403
404    /// Writes to the file at 'file_offset' from `buf`.
405    ///
406    /// Less efficient than `write_from_mem` because of extra copies and allocations.
407    async fn write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize> {
408        let backing_mem = Arc::new(cros_async::VecIoWrapper::from(buf.to_vec()));
409        let region = cros_async::MemRegion {
410            offset: 0,
411            len: buf.len(),
412        };
413        self.write_from_mem(
414            file_offset,
415            backing_mem,
416            cros_async::MemRegionIter::new(&[region]),
417        )
418        .await
419    }
420}
421
422/// A disk backed by a single file that implements `AsyncDisk` for access.
423pub struct SingleFileDisk {
424    inner: IoSource<File>,
425    // Whether the backed file is a block device since the punch-hole needs different operation.
426    #[cfg(any(target_os = "android", target_os = "linux"))]
427    is_block_device_file: bool,
428}
429
430impl DiskGetLen for SingleFileDisk {
431    fn get_len(&self) -> io::Result<u64> {
432        self.inner.as_source().get_len()
433    }
434}
435
436impl FileSetLen for SingleFileDisk {
437    fn set_len(&self, len: u64) -> io::Result<()> {
438        self.inner.as_source().set_len(len)
439    }
440}
441
442impl FileAllocate for SingleFileDisk {
443    fn allocate(&self, offset: u64, len: u64) -> io::Result<()> {
444        self.inner.as_source().allocate(offset, len)
445    }
446}
447
448#[async_trait(?Send)]
449impl AsyncDisk for SingleFileDisk {
450    async fn flush(&self) -> Result<()> {
451        // Nothing to flush, all file mutations are immediately sent to the OS.
452        Ok(())
453    }
454
455    async fn fsync(&self) -> Result<()> {
456        self.inner.fsync().await.map_err(Error::Fsync)
457    }
458
459    async fn fdatasync(&self) -> Result<()> {
460        self.inner.fdatasync().await.map_err(Error::Fdatasync)
461    }
462
463    async fn read_to_mem<'a>(
464        &'a self,
465        file_offset: u64,
466        mem: Arc<dyn BackingMemory + Send + Sync>,
467        mem_offsets: cros_async::MemRegionIter<'a>,
468    ) -> Result<usize> {
469        self.inner
470            .read_to_mem(Some(file_offset), mem, mem_offsets)
471            .await
472            .map_err(Error::ReadToMem)
473    }
474
475    async fn write_from_mem<'a>(
476        &'a self,
477        file_offset: u64,
478        mem: Arc<dyn BackingMemory + Send + Sync>,
479        mem_offsets: cros_async::MemRegionIter<'a>,
480    ) -> Result<usize> {
481        self.inner
482            .write_from_mem(Some(file_offset), mem, mem_offsets)
483            .await
484            .map_err(Error::WriteFromMem)
485    }
486
487    async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()> {
488        #[cfg(any(target_os = "android", target_os = "linux"))]
489        if self.is_block_device_file {
490            return base::linux::discard_block(self.inner.as_source(), file_offset, length)
491                .map_err(Error::PunchHoleBlockDeviceFile);
492        }
493        self.inner
494            .punch_hole(file_offset, length)
495            .await
496            .map_err(Error::PunchHole)
497    }
498
499    async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()> {
500        if self
501            .inner
502            .write_zeroes_at(file_offset, length)
503            .await
504            .is_ok()
505        {
506            return Ok(());
507        }
508
509        // Fall back to filling zeros if more efficient write_zeroes_at doesn't work.
510        let buf_size = min(length, 0x10000);
511        let mut nwritten = 0;
512        while nwritten < length {
513            let remaining = length - nwritten;
514            let write_size = min(remaining, buf_size) as usize;
515            let buf = vec![0u8; write_size];
516            nwritten += self
517                .inner
518                .write_from_vec(Some(file_offset + nwritten), buf)
519                .await
520                .map(|(n, _)| n as u64)
521                .map_err(Error::WriteFromVec)?;
522        }
523        Ok(())
524    }
525}