base/
mmap.rs

1// Copyright 2020 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::cmp::min;
6use std::fs::File;
7use std::io;
8use std::mem::size_of;
9use std::ptr::copy_nonoverlapping;
10use std::ptr::read_unaligned;
11use std::ptr::read_volatile;
12use std::ptr::write_unaligned;
13use std::ptr::write_volatile;
14use std::sync::atomic::fence;
15use std::sync::atomic::Ordering;
16use std::sync::OnceLock;
17
18use remain::sorted;
19use serde::Deserialize;
20use serde::Serialize;
21use zerocopy::FromBytes;
22use zerocopy::Immutable;
23use zerocopy::IntoBytes;
24
25use crate::descriptor::AsRawDescriptor;
26use crate::descriptor::SafeDescriptor;
27use crate::platform::MemoryMapping as PlatformMmap;
28use crate::SharedMemory;
29use crate::VolatileMemory;
30use crate::VolatileMemoryError;
31use crate::VolatileMemoryResult;
32use crate::VolatileSlice;
33
34static CACHELINE_SIZE: OnceLock<usize> = OnceLock::new();
35
36#[allow(unused_assignments)]
37fn get_cacheline_size_once() -> usize {
38    let mut assume_reason: &str = "unknown";
39    cfg_if::cfg_if! {
40        if #[cfg(all(any(target_os = "android", target_os = "linux"), not(target_env = "musl")))] {
41            // TODO: Remove once available in libc bindings
42            #[cfg(target_os = "android")]
43            const _SC_LEVEL1_DCACHE_LINESIZE: i32 = 0x0094;
44            #[cfg(target_os = "linux")]
45            use libc::_SC_LEVEL1_DCACHE_LINESIZE;
46
47            // SAFETY:
48            // Safe because we check the return value for errors or unsupported requests
49            let linesize = unsafe { libc::sysconf(_SC_LEVEL1_DCACHE_LINESIZE) };
50            if linesize > 0 {
51                return linesize as usize;
52            } else {
53                assume_reason = "sysconf cacheline size query failed";
54            }
55        } else {
56            assume_reason = "cacheline size query not implemented for platform/arch";
57        }
58    }
59
60    let assumed_size = 64;
61    log::debug!(
62        "assuming cacheline_size={}; reason: {}.",
63        assumed_size,
64        assume_reason
65    );
66    assumed_size
67}
68
69/// Returns the system's effective cacheline size (e.g. the granularity at which arch-specific
70/// cacheline management, such as with the clflush instruction, is expected to occur).
71#[inline(always)]
72fn get_cacheline_size() -> usize {
73    let size = *CACHELINE_SIZE.get_or_init(get_cacheline_size_once);
74    assert!(size > 0);
75    size
76}
77
78#[sorted]
79#[derive(Debug, thiserror::Error)]
80pub enum Error {
81    #[error("`add_fd_mapping` is unsupported")]
82    AddFdMappingIsUnsupported,
83    #[error("requested memory out of range")]
84    InvalidAddress,
85    #[error("requested alignment is incompatible")]
86    InvalidAlignment,
87    #[error("invalid argument provided when creating mapping")]
88    InvalidArgument,
89    #[error("requested offset is out of range of off_t")]
90    InvalidOffset,
91    #[error("requested memory range spans past the end of the region: offset={0} count={1} region_size={2}")]
92    InvalidRange(usize, usize, usize),
93    #[error("operation is not implemented on platform/architecture: {0}")]
94    NotImplemented(&'static str),
95    #[error("requested memory is not page aligned")]
96    NotPageAligned,
97    #[error("failed to read from file to memory: {0}")]
98    ReadToMemory(#[source] io::Error),
99    #[error("`remove_mapping` is unsupported")]
100    RemoveMappingIsUnsupported,
101    #[error("system call failed while creating the mapping: {0}")]
102    StdSyscallFailed(io::Error),
103    #[error("mmap related system call failed: {0}")]
104    SystemCallFailed(#[source] crate::Error),
105    #[error("failed to write from memory to file: {0}")]
106    WriteFromMemory(#[source] io::Error),
107}
108pub type Result<T> = std::result::Result<T, Error>;
109
110/// Memory access type for anonymous shared memory mapping.
111#[derive(Copy, Clone, Default, Eq, PartialEq, Serialize, Deserialize, Debug)]
112pub struct Protection {
113    pub(crate) read: bool,
114    pub(crate) write: bool,
115}
116
117impl Protection {
118    /// Returns Protection allowing read/write access.
119    #[inline(always)]
120    pub fn read_write() -> Protection {
121        Protection {
122            read: true,
123            write: true,
124        }
125    }
126
127    /// Returns Protection allowing read access.
128    #[inline(always)]
129    pub fn read() -> Protection {
130        Protection {
131            read: true,
132            ..Default::default()
133        }
134    }
135
136    /// Returns Protection allowing write access.
137    #[inline(always)]
138    pub fn write() -> Protection {
139        Protection {
140            write: true,
141            ..Default::default()
142        }
143    }
144
145    /// Set read events.
146    #[inline(always)]
147    pub fn set_read(self) -> Protection {
148        Protection { read: true, ..self }
149    }
150
151    /// Set write events.
152    #[inline(always)]
153    pub fn set_write(self) -> Protection {
154        Protection {
155            write: true,
156            ..self
157        }
158    }
159
160    /// Returns true if all access allowed by |other| is also allowed by |self|.
161    #[inline(always)]
162    pub fn allows(&self, other: &Protection) -> bool {
163        self.read >= other.read && self.write >= other.write
164    }
165}
166
167/// See [MemoryMapping](crate::platform::MemoryMapping) for struct- and method-level
168/// documentation.
169#[derive(Debug)]
170pub struct MemoryMapping {
171    pub(crate) mapping: PlatformMmap,
172
173    // File backed mappings on Windows need to keep the underlying file open while the mapping is
174    // open.
175    // This will be a None in non-windows case. The variable will not be read so the '^_'.
176    //
177    // TODO(b:230902713) There was a concern about relying on the kernel's refcounting to keep the
178    // file object's locks (e.g. exclusive read/write) in place. We need to revisit/validate that
179    // concern.
180    pub(crate) _file_descriptor: Option<SafeDescriptor>,
181}
182
183#[inline(always)]
184unsafe fn flush_one(_addr: *const u8) -> Result<()> {
185    cfg_if::cfg_if! {
186        if #[cfg(target_arch = "x86_64")] {
187            // As per table 11-7 of the SDM, processors are not required to
188            // snoop UC mappings, so flush the target to memory.
189            // SAFETY: assumes that the caller has supplied a valid address.
190            unsafe { core::arch::x86_64::_mm_clflush(_addr) };
191            Ok(())
192        } else if #[cfg(target_arch = "aarch64")] {
193            // Data cache clean by VA to PoC.
194            std::arch::asm!("DC CVAC, {x}", x = in(reg) _addr);
195            Ok(())
196        } else {
197            Err(Error::NotImplemented("Cache flush not implemented"))
198        }
199    }
200}
201
202impl MemoryMapping {
203    pub fn write_slice(&self, buf: &[u8], offset: usize) -> Result<usize> {
204        match self.mapping.size().checked_sub(offset) {
205            Some(size_past_offset) => {
206                let bytes_copied = min(size_past_offset, buf.len());
207                // SAFETY:
208                // The bytes_copied equation above ensures we don't copy bytes out of range of
209                // either buf or this slice. We also know that the buffers do not overlap because
210                // slices can never occupy the same memory as a volatile slice.
211                unsafe {
212                    copy_nonoverlapping(buf.as_ptr(), self.as_ptr().add(offset), bytes_copied);
213                }
214                Ok(bytes_copied)
215            }
216            None => Err(Error::InvalidAddress),
217        }
218    }
219
220    pub fn read_slice(&self, buf: &mut [u8], offset: usize) -> Result<usize> {
221        match self.size().checked_sub(offset) {
222            Some(size_past_offset) => {
223                let bytes_copied = min(size_past_offset, buf.len());
224                // SAFETY:
225                // The bytes_copied equation above ensures we don't copy bytes out of range of
226                // either buf or this slice. We also know that the buffers do not overlap because
227                // slices can never occupy the same memory as a volatile slice.
228                unsafe {
229                    copy_nonoverlapping(self.as_ptr().add(offset), buf.as_mut_ptr(), bytes_copied);
230                }
231                Ok(bytes_copied)
232            }
233            None => Err(Error::InvalidAddress),
234        }
235    }
236
237    /// Writes an object to the memory region at the specified offset.
238    /// Returns Ok(()) if the object fits, or Err if it extends past the end.
239    ///
240    /// This method is for writing to regular memory. If writing to a mapped
241    /// I/O region, use [`MemoryMapping::write_obj_volatile`].
242    ///
243    /// # Examples
244    /// * Write a u64 at offset 16.
245    ///
246    /// ```
247    /// #   use base::MemoryMappingBuilder;
248    /// #   use base::SharedMemory;
249    /// #   let shm = SharedMemory::new("test", 1024).unwrap();
250    /// #   let mut mem_map = MemoryMappingBuilder::new(1024).from_shared_memory(&shm).build().unwrap();
251    ///     let res = mem_map.write_obj(55u64, 16);
252    ///     assert!(res.is_ok());
253    /// ```
254    pub fn write_obj<T: IntoBytes + Immutable>(&self, val: T, offset: usize) -> Result<()> {
255        self.mapping.range_end(offset, size_of::<T>())?;
256        // SAFETY:
257        // This is safe because we checked the bounds above.
258        unsafe {
259            write_unaligned(self.as_ptr().add(offset) as *mut T, val);
260        }
261        Ok(())
262    }
263
264    /// Reads on object from the memory region at the given offset.
265    /// Reading from a volatile area isn't strictly safe as it could change
266    /// mid-read.  However, as long as the type T is plain old data and can
267    /// handle random initialization, everything will be OK.
268    ///
269    /// This method is for reading from regular memory. If reading from a
270    /// mapped I/O region, use [`MemoryMapping::read_obj_volatile`].
271    ///
272    /// # Examples
273    /// * Read a u64 written to offset 32.
274    ///
275    /// ```
276    /// #   use base::MemoryMappingBuilder;
277    /// #   let mut mem_map = MemoryMappingBuilder::new(1024).build().unwrap();
278    ///     let res = mem_map.write_obj(55u64, 32);
279    ///     assert!(res.is_ok());
280    ///     let num: u64 = mem_map.read_obj(32).unwrap();
281    ///     assert_eq!(55, num);
282    /// ```
283    pub fn read_obj<T: FromBytes>(&self, offset: usize) -> Result<T> {
284        self.mapping.range_end(offset, size_of::<T>())?;
285        // SAFETY:
286        // This is safe because by definition Copy types can have their bits set arbitrarily and
287        // still be valid.
288        unsafe {
289            Ok(read_unaligned(
290                self.as_ptr().add(offset) as *const u8 as *const T
291            ))
292        }
293    }
294
295    /// Writes an object to the memory region at the specified offset.
296    /// Returns Ok(()) if the object fits, or Err if it extends past the end.
297    ///
298    /// The write operation will be volatile, i.e. it will not be reordered by
299    /// the compiler and is suitable for I/O, but must be aligned. When writing
300    /// to regular memory, prefer [`MemoryMapping::write_obj`].
301    ///
302    /// # Examples
303    /// * Write a u32 at offset 16.
304    ///
305    /// ```
306    /// #   use base::MemoryMappingBuilder;
307    /// #   use base::SharedMemory;
308    /// #   let shm = SharedMemory::new("test", 1024).unwrap();
309    /// #   let mut mem_map = MemoryMappingBuilder::new(1024).from_shared_memory(&shm).build().unwrap();
310    ///     let res = mem_map.write_obj_volatile(0xf00u32, 16);
311    ///     assert!(res.is_ok());
312    /// ```
313    pub fn write_obj_volatile<T: IntoBytes + Immutable>(
314        &self,
315        val: T,
316        offset: usize,
317    ) -> Result<()> {
318        self.mapping.range_end(offset, size_of::<T>())?;
319        // Make sure writes to memory have been committed before performing I/O that could
320        // potentially depend on them.
321        fence(Ordering::SeqCst);
322        // SAFETY:
323        // This is safe because we checked the bounds above.
324        unsafe {
325            write_volatile(self.as_ptr().add(offset) as *mut T, val);
326        }
327        Ok(())
328    }
329
330    /// Reads on object from the memory region at the given offset.
331    /// Reading from a volatile area isn't strictly safe as it could change
332    /// mid-read.  However, as long as the type T is plain old data and can
333    /// handle random initialization, everything will be OK.
334    ///
335    /// The read operation will be volatile, i.e. it will not be reordered by
336    /// the compiler and is suitable for I/O, but must be aligned. When reading
337    /// from regular memory, prefer [`MemoryMapping::read_obj`].
338    ///
339    /// # Examples
340    /// * Read a u32 written to offset 16.
341    ///
342    /// ```
343    /// #   use base::MemoryMappingBuilder;
344    /// #   use base::SharedMemory;
345    /// #   let shm = SharedMemory::new("test", 1024).unwrap();
346    /// #   let mut mem_map = MemoryMappingBuilder::new(1024).from_shared_memory(&shm).build().unwrap();
347    ///     let res = mem_map.write_obj(0xf00u32, 16);
348    ///     assert!(res.is_ok());
349    ///     let num: u32 = mem_map.read_obj_volatile(16).unwrap();
350    ///     assert_eq!(0xf00, num);
351    /// ```
352    pub fn read_obj_volatile<T: FromBytes>(&self, offset: usize) -> Result<T> {
353        self.mapping.range_end(offset, size_of::<T>())?;
354        // SAFETY:
355        // This is safe because by definition Copy types can have their bits set arbitrarily and
356        // still be valid.
357        unsafe {
358            Ok(read_volatile(
359                self.as_ptr().add(offset) as *const u8 as *const T
360            ))
361        }
362    }
363
364    pub fn msync(&self) -> Result<()> {
365        self.mapping.msync()
366    }
367
368    /// Flush a region of the MemoryMapping from the system's caching hierarchy.
369    /// There are several uses for flushing:
370    ///
371    /// * Cached memory which the guest may be reading through an uncached mapping:
372    ///
373    ///   Guest reads via an uncached mapping can bypass the cache and directly access main
374    ///   memory. This is outside the memory model of Rust, which means that even with proper
375    ///   synchronization, guest reads via an uncached mapping might not see updates from the
376    ///   host. As such, it is necessary to perform architectural cache maintainance to flush the
377    ///   host writes to main memory.
378    ///
379    ///   Note that this does not support writable uncached guest mappings, as doing so
380    ///   requires invalidating the cache, not flushing the cache.
381    ///
382    /// * Uncached memory which the guest may be writing through a cached mapping:
383    ///
384    ///   Guest writes via a cached mapping of a host's uncached memory may never make it to
385    ///   system/device memory prior to being read. In such cases, explicit flushing of the cached
386    ///   writes is necessary, since other managers of the host's uncached mapping (e.g. DRM) see
387    ///   no need to flush, as they believe all writes would explicitly bypass the caches.
388    ///
389    /// Currently only supported on x86_64 and aarch64. Cannot be supported on 32-bit arm.
390    pub fn flush_region(&self, offset: usize, len: usize) -> Result<()> {
391        let addr: *const u8 = self.as_ptr();
392        let size = self.size();
393
394        // disallow overflow/wrapping ranges and subregion extending beyond mapped range
395        if usize::MAX - size < addr as usize || offset >= size || size - offset < len {
396            return Err(Error::InvalidRange(offset, len, size));
397        }
398
399        // SAFETY:
400        // Safe because already validated that `next` will be an address in the mapping:
401        //     * mapped region is non-wrapping
402        //     * subregion is bounded within the mapped region
403        let mut next: *const u8 = unsafe { addr.add(offset) };
404
405        let cacheline_size = get_cacheline_size();
406        let cacheline_count = len.div_ceil(cacheline_size);
407
408        for _ in 0..cacheline_count {
409            // SAFETY:
410            // Safe because `next` is guaranteed to be within the mapped region (see earlier
411            // validations), and flushing the cache doesn't affect any rust safety properties.
412            unsafe { flush_one(next)? };
413
414            // SAFETY:
415            // Safe because we never use next if it goes out of the mapped region or overflows its
416            // storage type (based on earlier validations and the loop bounds).
417            next = unsafe { next.add(cacheline_size) };
418        }
419        Ok(())
420    }
421
422    /// Flush all backing memory for a mapping in an arch-specific manner (see `flush_region()`).
423    pub fn flush_all(&self) -> Result<()> {
424        self.flush_region(0, self.size())
425    }
426}
427
428pub struct MemoryMappingBuilder<'a> {
429    pub(crate) descriptor: Option<&'a dyn AsRawDescriptor>,
430    pub(crate) is_file_descriptor: bool,
431    #[cfg_attr(target_os = "macos", allow(unused))]
432    pub(crate) size: usize,
433    pub(crate) offset: Option<u64>,
434    pub(crate) align: Option<u64>,
435    pub(crate) protection: Option<Protection>,
436    #[cfg_attr(target_os = "macos", allow(unused))]
437    #[cfg_attr(windows, allow(unused))]
438    pub(crate) populate: bool,
439}
440
441/// Builds a MemoryMapping object from the specified arguments.
442impl<'a> MemoryMappingBuilder<'a> {
443    /// Creates a new builder specifying size of the memory region in bytes.
444    pub fn new(size: usize) -> MemoryMappingBuilder<'a> {
445        MemoryMappingBuilder {
446            descriptor: None,
447            size,
448            is_file_descriptor: false,
449            offset: None,
450            align: None,
451            protection: None,
452            populate: false,
453        }
454    }
455
456    /// Build the memory mapping given the specified File to mapped memory
457    ///
458    /// Default: Create a new memory mapping.
459    ///
460    /// Note: this is a forward looking interface to accomodate platforms that
461    /// require special handling for file backed mappings.
462    #[allow(clippy::wrong_self_convention, unused_mut)]
463    pub fn from_file(mut self, file: &'a File) -> MemoryMappingBuilder<'a> {
464        // On Windows, files require special handling (next day shipping if possible).
465        self.is_file_descriptor = true;
466
467        self.descriptor = Some(file as &dyn AsRawDescriptor);
468        self
469    }
470
471    /// Build the memory mapping given the specified SharedMemory to mapped memory
472    ///
473    /// Default: Create a new memory mapping.
474    pub fn from_shared_memory(mut self, shm: &'a SharedMemory) -> MemoryMappingBuilder<'a> {
475        self.descriptor = Some(shm as &dyn AsRawDescriptor);
476        self
477    }
478
479    /// Offset in bytes from the beginning of the mapping to start the mmap.
480    ///
481    /// Default: No offset
482    pub fn offset(mut self, offset: u64) -> MemoryMappingBuilder<'a> {
483        self.offset = Some(offset);
484        self
485    }
486
487    /// Protection (e.g. readable/writable) of the memory region.
488    ///
489    /// Default: Read/write
490    pub fn protection(mut self, protection: Protection) -> MemoryMappingBuilder<'a> {
491        self.protection = Some(protection);
492        self
493    }
494
495    /// Alignment of the memory region mapping in bytes.
496    ///
497    /// Default: No alignment
498    pub fn align(mut self, alignment: u64) -> MemoryMappingBuilder<'a> {
499        self.align = Some(alignment);
500        self
501    }
502}
503
504impl VolatileMemory for MemoryMapping {
505    fn get_slice(&self, offset: usize, count: usize) -> VolatileMemoryResult<VolatileSlice> {
506        let mem_end = offset
507            .checked_add(count)
508            .ok_or(VolatileMemoryError::Overflow {
509                base: offset,
510                offset: count,
511            })?;
512
513        if mem_end > self.size() {
514            return Err(VolatileMemoryError::OutOfBounds { addr: mem_end });
515        }
516
517        let new_addr =
518            (self.as_ptr() as usize)
519                .checked_add(offset)
520                .ok_or(VolatileMemoryError::Overflow {
521                    base: self.as_ptr() as usize,
522                    offset,
523                })?;
524
525        // SAFETY:
526        // Safe because we checked that offset + count was within our range and we only ever hand
527        // out volatile accessors.
528        Ok(unsafe { VolatileSlice::from_raw_parts(new_addr as *mut u8, count) })
529    }
530}
531
532/// A range of memory that can be msynced, for abstracting over different types of memory mappings.
533///
534/// # Safety
535/// Safe when implementers guarantee `ptr`..`ptr+size` is an mmaped region owned by this object that
536/// can't be unmapped during the `MappedRegion`'s lifetime.
537pub unsafe trait MappedRegion: Send + Sync {
538    // SAFETY:
539    /// Returns a pointer to the beginning of the memory region. Should only be
540    /// used for passing this region to ioctls for setting guest memory.
541    fn as_ptr(&self) -> *mut u8;
542
543    /// Returns the size of the memory region in bytes.
544    fn size(&self) -> usize;
545
546    /// Maps `size` bytes starting at `fd_offset` bytes from within the given `fd`
547    /// at `offset` bytes from the start of the region with `prot` protections.
548    /// `offset` must be page aligned.
549    ///
550    /// # Arguments
551    /// * `offset` - Page aligned offset into the arena in bytes.
552    /// * `size` - Size of memory region in bytes.
553    /// * `fd` - File descriptor to mmap from.
554    /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
555    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
556    fn add_fd_mapping(
557        &mut self,
558        _offset: usize,
559        _size: usize,
560        _fd: &dyn AsRawDescriptor,
561        _fd_offset: u64,
562        _prot: Protection,
563    ) -> Result<()> {
564        Err(Error::AddFdMappingIsUnsupported)
565    }
566
567    /// Remove `size`-byte mapping starting at `offset`.
568    fn remove_mapping(&mut self, _offset: usize, _size: usize) -> Result<()> {
569        Err(Error::RemoveMappingIsUnsupported)
570    }
571}
572
573// SAFETY:
574// Safe because it exclusively forwards calls to a safe implementation.
575unsafe impl MappedRegion for MemoryMapping {
576    fn as_ptr(&self) -> *mut u8 {
577        self.mapping.as_ptr()
578    }
579
580    fn size(&self) -> usize {
581        self.mapping.size()
582    }
583}
584
585#[derive(Debug, PartialEq, Eq)]
586pub struct ExternalMapping {
587    pub ptr: u64,
588    pub size: usize,
589}
590
591// SAFETY:
592// `ptr`..`ptr+size` is an mmaped region and is owned by this object. Caller
593// needs to ensure that the region is not unmapped during the `MappedRegion`'s
594// lifetime.
595unsafe impl MappedRegion for ExternalMapping {
596    /// used for passing this region to ioctls for setting guest memory.
597    fn as_ptr(&self) -> *mut u8 {
598        self.ptr as *mut u8
599    }
600
601    /// Returns the size of the memory region in bytes.
602    fn size(&self) -> usize {
603        self.size
604    }
605}