1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// Copyright 2022 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

use base::linux::FileDataIterator;
use base::linux::MemfdSeals;
use base::linux::MemoryMappingUnix;
use base::linux::SharedMemoryLinux;
use base::MappedRegion;
use base::SharedMemory;
use bitflags::bitflags;

use crate::Error;
use crate::FileBackedMappingParameters;
use crate::GuestAddress;
use crate::GuestMemory;
use crate::MemoryRegion;
use crate::Result;

bitflags! {
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct MemoryPolicy: u32 {
        const USE_HUGEPAGES = 1;
        const LOCK_GUEST_MEMORY = (1 << 1);
        const USE_DONTNEED_LOCKED = (1 << 2);
    }
}

pub(crate) fn finalize_shm(shm: &mut SharedMemory) -> Result<()> {
    // Seals are only a concept on Unix systems, so we must add them in conditional
    // compilation. On Windows, SharedMemory allocation cannot be updated after creation
    // regardless, so the same operation is done implicitly.
    let mut seals = MemfdSeals::new();

    seals.set_shrink_seal();
    seals.set_grow_seal();
    seals.set_seal_seal();

    shm.add_seals(seals).map_err(Error::MemoryAddSealsFailed)
}

impl GuestMemory {
    /// Madvise away the address range in the host that is associated with the given guest range.
    ///
    /// This feature is only available on Unix, where a MemoryMapping can remove a mapped range.
    pub fn remove_range(&self, addr: GuestAddress, count: u64) -> Result<()> {
        let (mapping, offset, _) = self.find_region(addr)?;
        mapping
            .remove_range(offset, count as usize)
            .map_err(|e| Error::MemoryAccess(addr, e))
    }

    /// Madvise away the address range in the host that is associated with the given guest range.
    ///
    /// This feature is only available on Unix, where a MemoryMapping can remove a mapped range.
    ///
    /// Requires a 5.18+ kernel.
    pub fn dontneed_locked_range(&self, addr: GuestAddress, count: u64) -> Result<()> {
        let (mapping, offset, _) = self.find_region(addr)?;
        mapping
            .dontneed_locked_range(offset, count as usize)
            .map_err(|e| Error::MemoryAccess(addr, e))
    }

    /// Handles guest memory policy hints/advices.
    pub fn set_memory_policy(&mut self, mem_policy: MemoryPolicy) {
        if mem_policy.is_empty() {
            return;
        }

        for region in self.regions.iter() {
            if mem_policy.contains(MemoryPolicy::USE_HUGEPAGES) {
                let ret = region.mapping.use_hugepages();

                if let Err(err) = ret {
                    println!("Failed to enable HUGEPAGE for mapping {}", err);
                }
            }

            if mem_policy.contains(MemoryPolicy::LOCK_GUEST_MEMORY) {
                self.locked = true;

                // This is done in coordination with remove_range() calls, which are
                // performed by the virtio-balloon process (they must be performed by
                // a different process from the one that issues the locks).
                // We also prevent this from happening in single-process configurations,
                // when we compute configuration flags.
                let ret = region.mapping.lock_all();

                if let Err(err) = ret {
                    println!("Failed to lock memory for mapping {}", err);
                }
            }

            if mem_policy.contains(MemoryPolicy::USE_DONTNEED_LOCKED) {
                self.use_dontneed_locked = true;
            }
        }
    }

    pub fn use_dontfork(&self) -> anyhow::Result<()> {
        for region in self.regions.iter() {
            region.mapping.use_dontfork()?;
        }
        Ok(())
    }
}

impl FileBackedMappingParameters {
    pub fn open(&self) -> std::io::Result<std::fs::File> {
        use std::os::unix::fs::OpenOptionsExt;
        Ok(base::open_file_or_duplicate(
            &self.path,
            std::fs::OpenOptions::new()
                .read(true)
                .write(self.writable)
                .custom_flags(if self.sync { libc::O_SYNC } else { 0 }),
        )?)
    }
}

impl MemoryRegion {
    /// Finds ranges of memory that might have non-zero data (i.e. not unallocated memory). The
    /// ranges are offsets into the region's mmap, not offsets into the backing file.
    ///
    /// For example, if there were three bytes and the second byte was a hole, the return would be
    /// `[1..2]` (in practice these are probably always at least page sized).
    pub(crate) fn find_data_ranges(&self) -> anyhow::Result<Vec<std::ops::Range<usize>>> {
        FileDataIterator::new(
            &self.shared_obj,
            self.obj_offset,
            u64::try_from(self.mapping.size()).unwrap(),
        )
        .map(|range| {
            let range = range?;
            // Convert from file offsets to mmap offsets.
            Ok(usize::try_from(range.start - self.obj_offset).unwrap()
                ..usize::try_from(range.end - self.obj_offset).unwrap())
        })
        .collect()
    }

    pub(crate) fn zero_range(&self, offset: usize, size: usize) -> anyhow::Result<()> {
        self.mapping.remove_range(offset, size)?;
        Ok(())
    }
}