base/sys/linux/
mmap.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! The mmap module provides a safe interface to mmap memory and ensures unmap is called when the
6//! mmap object leaves scope.
7
8use std::ptr::null_mut;
9
10use libc::c_int;
11use libc::PROT_READ;
12use libc::PROT_WRITE;
13use log::warn;
14
15use super::Error as ErrnoError;
16use crate::pagesize;
17use crate::AsRawDescriptor;
18use crate::Descriptor;
19use crate::MappedRegion;
20use crate::MemoryMapping as CrateMemoryMapping;
21use crate::MemoryMappingBuilder;
22use crate::MmapError as Error;
23use crate::MmapResult as Result;
24use crate::Protection;
25use crate::RawDescriptor;
26use crate::SafeDescriptor;
27
28impl From<Protection> for c_int {
29    #[inline(always)]
30    fn from(p: Protection) -> Self {
31        let mut value = 0;
32        if p.read {
33            value |= PROT_READ
34        }
35        if p.write {
36            value |= PROT_WRITE;
37        }
38        value
39    }
40}
41
42/// Validates that `offset`..`offset+range_size` lies within the bounds of a memory mapping of
43/// `mmap_size` bytes.  Also checks for any overflow.
44fn validate_includes_range(mmap_size: usize, offset: usize, range_size: usize) -> Result<()> {
45    // Ensure offset + size doesn't overflow
46    let end_offset = offset
47        .checked_add(range_size)
48        .ok_or(Error::InvalidAddress)?;
49    // Ensure offset + size are within the mapping bounds
50    if end_offset <= mmap_size {
51        Ok(())
52    } else {
53        Err(Error::InvalidAddress)
54    }
55}
56
57impl dyn MappedRegion {
58    /// Calls msync with MS_SYNC on a mapping of `size` bytes starting at `offset` from the start of
59    /// the region.  `offset`..`offset+size` must be contained within the `MappedRegion`.
60    pub fn msync(&self, offset: usize, size: usize) -> Result<()> {
61        validate_includes_range(self.size(), offset, size)?;
62
63        // SAFETY:
64        // Safe because the MemoryMapping/MemoryMappingArena interface ensures our pointer and size
65        // are correct, and we've validated that `offset`..`offset+size` is in the range owned by
66        // this `MappedRegion`.
67        let ret = unsafe {
68            libc::msync(
69                (self.as_ptr() as usize + offset) as *mut libc::c_void,
70                size,
71                libc::MS_SYNC,
72            )
73        };
74        if ret != -1 {
75            Ok(())
76        } else {
77            Err(Error::SystemCallFailed(ErrnoError::last()))
78        }
79    }
80
81    /// Calls madvise on a mapping of `size` bytes starting at `offset` from the start of
82    /// the region.  `offset`..`offset+size` must be contained within the `MappedRegion`.
83    pub fn madvise(&self, offset: usize, size: usize, advice: libc::c_int) -> Result<()> {
84        validate_includes_range(self.size(), offset, size)?;
85
86        // SAFETY:
87        // Safe because the MemoryMapping/MemoryMappingArena interface ensures our pointer and size
88        // are correct, and we've validated that `offset`..`offset+size` is in the range owned by
89        // this `MappedRegion`.
90        let ret = unsafe {
91            libc::madvise(
92                (self.as_ptr() as usize + offset) as *mut libc::c_void,
93                size,
94                advice,
95            )
96        };
97        if ret != -1 {
98            Ok(())
99        } else {
100            Err(Error::SystemCallFailed(ErrnoError::last()))
101        }
102    }
103}
104
105/// Wraps an anonymous shared memory mapping in the current process. Provides
106/// RAII semantics including munmap when no longer needed.
107#[derive(Debug)]
108pub struct MemoryMapping {
109    addr: *mut u8,
110    size: usize,
111}
112
113// SAFETY:
114// Send and Sync aren't automatically inherited for the raw address pointer.
115// Accessing that pointer is only done through the stateless interface which
116// allows the object to be shared by multiple threads without a decrease in
117// safety.
118unsafe impl Send for MemoryMapping {}
119// SAFETY: See safety comments for impl Send
120unsafe impl Sync for MemoryMapping {}
121
122impl MemoryMapping {
123    /// Creates an anonymous shared, read/write mapping of `size` bytes.
124    ///
125    /// # Arguments
126    /// * `size` - Size of memory region in bytes.
127    pub fn new(size: usize) -> Result<MemoryMapping> {
128        MemoryMapping::new_protection(size, None, Protection::read_write())
129    }
130
131    /// Creates an anonymous shared mapping of `size` bytes with `prot` protection.
132    ///
133    /// # Arguments
134    /// * `size` - Size of memory region in bytes.
135    /// * `align` - Optional alignment for MemoryMapping::addr.
136    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
137    pub fn new_protection(
138        size: usize,
139        align: Option<u64>,
140        prot: Protection,
141    ) -> Result<MemoryMapping> {
142        // SAFETY:
143        // This is safe because we are creating an anonymous mapping in a place not already used by
144        // any other area in this process.
145        unsafe { MemoryMapping::try_mmap(None, size, align, prot.into(), None) }
146    }
147
148    /// Maps the first `size` bytes of the given `fd` as read/write.
149    ///
150    /// # Arguments
151    /// * `fd` - File descriptor to mmap from.
152    /// * `size` - Size of memory region in bytes.
153    pub fn from_fd(fd: &dyn AsRawDescriptor, size: usize) -> Result<MemoryMapping> {
154        MemoryMapping::from_fd_offset(fd, size, 0)
155    }
156
157    pub fn from_fd_offset(
158        fd: &dyn AsRawDescriptor,
159        size: usize,
160        offset: u64,
161    ) -> Result<MemoryMapping> {
162        MemoryMapping::from_fd_offset_protection(fd, size, offset, Protection::read_write())
163    }
164
165    /// Maps the `size` bytes starting at `offset` bytes of the given `fd` as read/write.
166    ///
167    /// # Arguments
168    /// * `fd` - File descriptor to mmap from.
169    /// * `size` - Size of memory region in bytes.
170    /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
171    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
172    pub fn from_fd_offset_protection(
173        fd: &dyn AsRawDescriptor,
174        size: usize,
175        offset: u64,
176        prot: Protection,
177    ) -> Result<MemoryMapping> {
178        MemoryMapping::from_fd_offset_protection_populate(fd, size, offset, 0, prot, false)
179    }
180
181    /// Maps `size` bytes starting at `offset` from the given `fd` as read/write, and requests
182    /// that the pages are pre-populated.
183    /// # Arguments
184    /// * `fd` - File descriptor to mmap from.
185    /// * `size` - Size of memory region in bytes.
186    /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
187    /// * `align` - Alignment for MemoryMapping::addr.
188    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
189    /// * `populate` - Populate (prefault) page tables for a mapping.
190    pub fn from_fd_offset_protection_populate(
191        fd: &dyn AsRawDescriptor,
192        size: usize,
193        offset: u64,
194        align: u64,
195        prot: Protection,
196        populate: bool,
197    ) -> Result<MemoryMapping> {
198        // SAFETY:
199        // This is safe because we are creating an anonymous mapping in a place not already used
200        // by any other area in this process.
201        unsafe {
202            MemoryMapping::try_mmap_populate(
203                None,
204                size,
205                Some(align),
206                prot.into(),
207                Some((fd, offset)),
208                populate,
209            )
210        }
211    }
212
213    /// Creates an anonymous shared mapping of `size` bytes with `prot` protection.
214    ///
215    /// # Arguments
216    ///
217    /// * `addr` - Memory address to mmap at.
218    /// * `size` - Size of memory region in bytes.
219    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
220    ///
221    /// # Safety
222    ///
223    /// This function should not be called before the caller unmaps any mmap'd regions already
224    /// present at `(addr..addr+size)`.
225    pub unsafe fn new_protection_fixed(
226        addr: *mut u8,
227        size: usize,
228        prot: Protection,
229    ) -> Result<MemoryMapping> {
230        MemoryMapping::try_mmap(Some(addr), size, None, prot.into(), None)
231    }
232
233    /// Maps the `size` bytes starting at `offset` bytes of the given `fd` with
234    /// `prot` protections.
235    ///
236    /// # Arguments
237    ///
238    /// * `addr` - Memory address to mmap at.
239    /// * `fd` - File descriptor to mmap from.
240    /// * `size` - Size of memory region in bytes.
241    /// * `offset` - Offset in bytes from the beginning of `fd` to start the mmap.
242    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
243    ///
244    /// # Safety
245    ///
246    /// This function should not be called before the caller unmaps any mmap'd regions already
247    /// present at `(addr..addr+size)`.
248    pub unsafe fn from_descriptor_offset_protection_fixed(
249        addr: *mut u8,
250        fd: &dyn AsRawDescriptor,
251        size: usize,
252        offset: u64,
253        prot: Protection,
254    ) -> Result<MemoryMapping> {
255        MemoryMapping::try_mmap(Some(addr), size, None, prot.into(), Some((fd, offset)))
256    }
257
258    /// Helper wrapper around try_mmap_populate when without MAP_POPULATE
259    unsafe fn try_mmap(
260        addr: Option<*mut u8>,
261        size: usize,
262        align: Option<u64>,
263        prot: c_int,
264        fd: Option<(&dyn AsRawDescriptor, u64)>,
265    ) -> Result<MemoryMapping> {
266        MemoryMapping::try_mmap_populate(addr, size, align, prot, fd, false)
267    }
268
269    /// Helper wrapper around libc::mmap that does some basic validation, and calls
270    /// madvise with MADV_DONTDUMP on the created mmap
271    unsafe fn try_mmap_populate(
272        addr: Option<*mut u8>,
273        size: usize,
274        align: Option<u64>,
275        prot: c_int,
276        fd: Option<(&dyn AsRawDescriptor, u64)>,
277        populate: bool,
278    ) -> Result<MemoryMapping> {
279        let mut flags = libc::MAP_SHARED;
280        if populate {
281            flags |= libc::MAP_POPULATE;
282        }
283        // If addr is provided, set the (FIXED | NORESERVE) flag, and validate addr alignment.
284        let addr = match addr {
285            Some(addr) => {
286                if (addr as usize) % pagesize() != 0 {
287                    return Err(Error::NotPageAligned);
288                }
289                flags |= libc::MAP_FIXED | libc::MAP_NORESERVE;
290                addr as *mut libc::c_void
291            }
292            None => null_mut(),
293        };
294
295        // mmap already PAGE_SIZE align the returned address.
296        let align = if align.unwrap_or(0) == pagesize() as u64 {
297            Some(0)
298        } else {
299            align
300        };
301
302        // Add an address if an alignment is requested.
303        let (addr, orig_addr, orig_size) = match align {
304            None | Some(0) => (addr, None, None),
305            Some(align) => {
306                if !addr.is_null() || !align.is_power_of_two() {
307                    return Err(Error::InvalidAlignment);
308                }
309                let orig_size = size + align as usize;
310                let orig_addr = libc::mmap64(
311                    null_mut(),
312                    orig_size,
313                    prot,
314                    libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ANONYMOUS,
315                    -1,
316                    0,
317                );
318                if orig_addr == libc::MAP_FAILED {
319                    return Err(Error::SystemCallFailed(ErrnoError::last()));
320                }
321
322                flags |= libc::MAP_FIXED;
323
324                let mask = align - 1;
325                (
326                    (orig_addr.wrapping_add(mask as usize) as u64 & !mask) as *mut libc::c_void,
327                    Some(orig_addr),
328                    Some(orig_size),
329                )
330            }
331        };
332
333        // If fd is provided, validate fd offset is within bounds. If not, it's anonymous mapping
334        // and set the (ANONYMOUS | NORESERVE) flag.
335        let (fd, offset) = match fd {
336            Some((fd, offset)) => {
337                if offset > libc::off64_t::MAX as u64 {
338                    return Err(Error::InvalidOffset);
339                }
340                // Map private for read-only seal. See below for upstream relax of the restriction.
341                // - https://lore.kernel.org/bpf/20231013103208.kdffpyerufr4ygnw@quack3/T/
342                // SAFETY:
343                // Safe because no third parameter is expected and we check the return result.
344                let seals = unsafe { libc::fcntl(fd.as_raw_descriptor(), libc::F_GET_SEALS) };
345                if (seals >= 0) && (seals & libc::F_SEAL_WRITE != 0) {
346                    flags &= !libc::MAP_SHARED;
347                    flags |= libc::MAP_PRIVATE;
348                }
349                (fd.as_raw_descriptor(), offset as libc::off64_t)
350            }
351            None => {
352                flags |= libc::MAP_ANONYMOUS | libc::MAP_NORESERVE;
353                (-1, 0)
354            }
355        };
356        let addr = libc::mmap64(addr, size, prot, flags, fd, offset);
357        if addr == libc::MAP_FAILED {
358            return Err(Error::SystemCallFailed(ErrnoError::last()));
359        }
360
361        // If an original mmap exists, we can now remove the unused regions
362        if let Some(orig_addr) = orig_addr {
363            let mut unmap_start = orig_addr as usize;
364            let mut unmap_end = addr as usize;
365            let mut unmap_size = unmap_end - unmap_start;
366
367            if unmap_size > 0 {
368                libc::munmap(orig_addr, unmap_size);
369            }
370
371            unmap_start = addr as usize + size;
372            unmap_end = orig_addr as usize + orig_size.unwrap();
373            unmap_size = unmap_end - unmap_start;
374
375            if unmap_size > 0 {
376                libc::munmap(unmap_start as *mut libc::c_void, unmap_size);
377            }
378        }
379
380        // This is safe because we call madvise with a valid address and size.
381        let _ = libc::madvise(addr, size, libc::MADV_DONTDUMP);
382
383        // This is safe because KSM's only userspace visible effects are timing
384        // and memory consumption; it doesn't affect rust safety semantics.
385        // KSM is also disabled by default, and this flag is only a hint.
386        let _ = libc::madvise(addr, size, libc::MADV_MERGEABLE);
387
388        Ok(MemoryMapping {
389            addr: addr as *mut u8,
390            size,
391        })
392    }
393
394    /// Madvise the kernel to unmap on fork.
395    pub fn use_dontfork(&self) -> Result<()> {
396        // SAFETY:
397        // This is safe because we call madvise with a valid address and size, and we check the
398        // return value.
399        let ret = unsafe {
400            libc::madvise(
401                self.as_ptr() as *mut libc::c_void,
402                self.size(),
403                libc::MADV_DONTFORK,
404            )
405        };
406        if ret == -1 {
407            Err(Error::SystemCallFailed(ErrnoError::last()))
408        } else {
409            Ok(())
410        }
411    }
412
413    /// Madvise the kernel to use Huge Pages for this mapping.
414    pub fn use_hugepages(&self) -> Result<()> {
415        const SZ_2M: usize = 2 * 1024 * 1024;
416
417        // THP uses 2M pages, so use THP only on mappings that are at least
418        // 2M in size.
419        if self.size() < SZ_2M {
420            return Ok(());
421        }
422
423        // SAFETY:
424        // This is safe because we call madvise with a valid address and size, and we check the
425        // return value.
426        let ret = unsafe {
427            libc::madvise(
428                self.as_ptr() as *mut libc::c_void,
429                self.size(),
430                libc::MADV_HUGEPAGE,
431            )
432        };
433        if ret == -1 {
434            Err(Error::SystemCallFailed(ErrnoError::last()))
435        } else {
436            Ok(())
437        }
438    }
439
440    /// Calls msync with MS_SYNC on the mapping.
441    pub fn msync(&self) -> Result<()> {
442        // SAFETY:
443        // This is safe since we use the exact address and length of a known
444        // good memory mapping.
445        let ret = unsafe {
446            libc::msync(
447                self.as_ptr() as *mut libc::c_void,
448                self.size(),
449                libc::MS_SYNC,
450            )
451        };
452        if ret == -1 {
453            return Err(Error::SystemCallFailed(ErrnoError::last()));
454        }
455        Ok(())
456    }
457
458    /// Uses madvise to tell the kernel to remove the specified range.  Subsequent reads
459    /// to the pages in the range will return zero bytes.
460    pub fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()> {
461        self.range_end(mem_offset, count)
462            .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
463        // SAFETY: Safe because all the args to madvise are valid and the return
464        // value is checked.
465        let ret = unsafe {
466            // madvising away the region is the same as the guest changing it.
467            // Next time it is read, it may return zero pages.
468            libc::madvise(
469                (self.addr as usize + mem_offset) as *mut _,
470                count,
471                libc::MADV_REMOVE,
472            )
473        };
474        if ret < 0 {
475            Err(Error::SystemCallFailed(super::Error::last()))
476        } else {
477            Ok(())
478        }
479    }
480
481    /// Tell the kernel to readahead the range.
482    ///
483    /// This does not block the thread by I/O wait from reading the backed file. This does not
484    /// guarantee that the pages are surely present unless the pages are mlock(2)ed by
485    /// `lock_on_fault_unchecked()`.
486    ///
487    /// The `mem_offset` and `count` must be validated by caller.
488    ///
489    /// # Arguments
490    ///
491    /// * `mem_offset` - The offset of the head of the range.
492    /// * `count` - The size in bytes of the range.
493    pub fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()> {
494        // Validation
495        self.range_end(mem_offset, count)
496            .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
497        // SAFETY:
498        // Safe because populating the pages from the backed file does not affect the Rust memory
499        // safety.
500        let ret = unsafe {
501            libc::madvise(
502                (self.addr as usize + mem_offset) as *mut _,
503                count,
504                libc::MADV_WILLNEED,
505            )
506        };
507        if ret < 0 {
508            Err(Error::SystemCallFailed(super::Error::last()))
509        } else {
510            Ok(())
511        }
512    }
513
514    /// Tell the kernel to drop the page cache.
515    ///
516    /// This cannot be applied to locked pages.
517    ///
518    /// The `mem_offset` and `count` must be validated by caller.
519    ///
520    /// NOTE: This function has destructive semantics. It throws away data in the page cache without
521    /// writing it to the backing file. If the data is important, the caller should ensure it is
522    /// written to disk before calling this function or should use MADV_PAGEOUT instead.
523    ///
524    /// # Arguments
525    ///
526    /// * `mem_offset` - The offset of the head of the range.
527    /// * `count` - The size in bytes of the range.
528    pub fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()> {
529        // Validation
530        self.range_end(mem_offset, count)
531            .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
532        // SAFETY:
533        // Safe because dropping the page cache does not affect the Rust memory safety.
534        let ret = unsafe {
535            libc::madvise(
536                (self.addr as usize + mem_offset) as *mut _,
537                count,
538                libc::MADV_DONTNEED,
539            )
540        };
541        if ret < 0 {
542            Err(Error::SystemCallFailed(super::Error::last()))
543        } else {
544            Ok(())
545        }
546    }
547
548    /// Lock the resident pages in the range not to be swapped out.
549    ///
550    /// The remaining nonresident page are locked when they are populated.
551    ///
552    /// The `mem_offset` and `count` must be validated by caller.
553    ///
554    /// # Arguments
555    ///
556    /// * `mem_offset` - The offset of the head of the range.
557    /// * `count` - The size in bytes of the range.
558    pub fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()> {
559        // Validation
560        self.range_end(mem_offset, count)
561            .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
562        let addr = self.addr as usize + mem_offset;
563        // SAFETY:
564        // Safe because MLOCK_ONFAULT only affects the swap behavior of the kernel, so it has no
565        // impact on rust semantics.
566        let ret = unsafe { libc::mlock2(addr as *mut _, count, libc::MLOCK_ONFAULT) };
567        if ret < 0 {
568            let errno = super::Error::last();
569            warn!(
570                "failed to mlock at {:#x} with length {}: {}",
571                addr as u64,
572                self.size(),
573                errno,
574            );
575            Err(Error::SystemCallFailed(errno))
576        } else {
577            Ok(())
578        }
579    }
580
581    /// Unlock the range of pages.
582    ///
583    /// Unlocking non-locked pages does not fail.
584    ///
585    /// The `mem_offset` and `count` must be validated by caller.
586    ///
587    /// # Arguments
588    ///
589    /// * `mem_offset` - The offset of the head of the range.
590    /// * `count` - The size in bytes of the range.
591    pub fn unlock(&self, mem_offset: usize, count: usize) -> Result<()> {
592        // Validation
593        self.range_end(mem_offset, count)
594            .map_err(|_| Error::InvalidRange(mem_offset, count, self.size()))?;
595        // SAFETY:
596        // Safe because munlock(2) does not affect the Rust memory safety.
597        let ret = unsafe { libc::munlock((self.addr as usize + mem_offset) as *mut _, count) };
598        if ret < 0 {
599            Err(Error::SystemCallFailed(super::Error::last()))
600        } else {
601            Ok(())
602        }
603    }
604
605    // Check that offset+count is valid and return the sum.
606    pub(crate) fn range_end(&self, offset: usize, count: usize) -> Result<usize> {
607        let mem_end = offset.checked_add(count).ok_or(Error::InvalidAddress)?;
608        if mem_end > self.size() {
609            return Err(Error::InvalidAddress);
610        }
611        Ok(mem_end)
612    }
613}
614
615// SAFETY:
616// Safe because the pointer and size point to a memory range owned by this MemoryMapping that won't
617// be unmapped until it's Dropped.
618unsafe impl MappedRegion for MemoryMapping {
619    fn as_ptr(&self) -> *mut u8 {
620        self.addr
621    }
622
623    fn size(&self) -> usize {
624        self.size
625    }
626}
627
628impl Drop for MemoryMapping {
629    fn drop(&mut self) {
630        // SAFETY:
631        // This is safe because we mmap the area at addr ourselves, and nobody
632        // else is holding a reference to it.
633        unsafe {
634            libc::munmap(self.addr as *mut libc::c_void, self.size);
635        }
636    }
637}
638
639/// Tracks Fixed Memory Maps within an anonymous memory-mapped fixed-sized arena
640/// in the current process.
641pub struct MemoryMappingArena {
642    addr: *mut u8,
643    size: usize,
644}
645
646// SAFETY:
647// Send and Sync aren't automatically inherited for the raw address pointer.
648// Accessing that pointer is only done through the stateless interface which
649// allows the object to be shared by multiple threads without a decrease in
650// safety.
651unsafe impl Send for MemoryMappingArena {}
652// SAFETY: See safety comments for impl Send
653unsafe impl Sync for MemoryMappingArena {}
654
655impl MemoryMappingArena {
656    /// Creates an mmap arena of `size` bytes.
657    ///
658    /// # Arguments
659    /// * `size` - Size of memory region in bytes.
660    pub fn new(size: usize) -> Result<MemoryMappingArena> {
661        // Reserve the arena's memory using an anonymous read-only mmap.
662        MemoryMapping::new_protection(size, None, Protection::read()).map(From::from)
663    }
664
665    /// Anonymously maps `size` bytes at `offset` bytes from the start of the arena
666    /// with `prot` protections. `offset` must be page aligned.
667    ///
668    /// # Arguments
669    /// * `offset` - Page aligned offset into the arena in bytes.
670    /// * `size` - Size of memory region in bytes.
671    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
672    pub fn add_anon_protection(
673        &mut self,
674        offset: usize,
675        size: usize,
676        prot: Protection,
677    ) -> Result<()> {
678        self.try_add(offset, size, prot, None)
679    }
680
681    /// Anonymously maps `size` bytes at `offset` bytes from the start of the arena.
682    /// `offset` must be page aligned.
683    ///
684    /// # Arguments
685    /// * `offset` - Page aligned offset into the arena in bytes.
686    /// * `size` - Size of memory region in bytes.
687    pub fn add_anon(&mut self, offset: usize, size: usize) -> Result<()> {
688        self.add_anon_protection(offset, size, Protection::read_write())
689    }
690
691    /// Maps `size` bytes from the start of the given `fd` at `offset` bytes from
692    /// the start of the arena. `offset` must be page aligned.
693    ///
694    /// # Arguments
695    /// * `offset` - Page aligned offset into the arena in bytes.
696    /// * `size` - Size of memory region in bytes.
697    /// * `fd` - File descriptor to mmap from.
698    pub fn add_fd(&mut self, offset: usize, size: usize, fd: &dyn AsRawDescriptor) -> Result<()> {
699        self.add_fd_offset(offset, size, fd, 0)
700    }
701
702    /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
703    /// at `offset` bytes from the start of the arena. `offset` must be page aligned.
704    ///
705    /// # Arguments
706    /// * `offset` - Page aligned offset into the arena in bytes.
707    /// * `size` - Size of memory region in bytes.
708    /// * `fd` - File descriptor to mmap from.
709    /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
710    pub fn add_fd_offset(
711        &mut self,
712        offset: usize,
713        size: usize,
714        fd: &dyn AsRawDescriptor,
715        fd_offset: u64,
716    ) -> Result<()> {
717        self.add_fd_offset_protection(offset, size, fd, fd_offset, Protection::read_write())
718    }
719
720    /// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
721    /// at `offset` bytes from the start of the arena with `prot` protections.
722    /// `offset` must be page aligned.
723    ///
724    /// # Arguments
725    /// * `offset` - Page aligned offset into the arena in bytes.
726    /// * `size` - Size of memory region in bytes.
727    /// * `fd` - File descriptor to mmap from.
728    /// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
729    /// * `prot` - Protection (e.g. readable/writable) of the memory region.
730    pub fn add_fd_offset_protection(
731        &mut self,
732        offset: usize,
733        size: usize,
734        fd: &dyn AsRawDescriptor,
735        fd_offset: u64,
736        prot: Protection,
737    ) -> Result<()> {
738        self.try_add(offset, size, prot, Some((fd, fd_offset)))
739    }
740
741    /// Helper method that calls appropriate MemoryMapping constructor and adds
742    /// the resulting map into the arena.
743    fn try_add(
744        &mut self,
745        offset: usize,
746        size: usize,
747        prot: Protection,
748        fd: Option<(&dyn AsRawDescriptor, u64)>,
749    ) -> Result<()> {
750        // Ensure offset is page-aligned
751        if offset % pagesize() != 0 {
752            return Err(Error::NotPageAligned);
753        }
754        validate_includes_range(self.size(), offset, size)?;
755
756        // SAFETY:
757        // This is safe since the range has been validated.
758        let mmap = unsafe {
759            match fd {
760                Some((fd, fd_offset)) => MemoryMapping::from_descriptor_offset_protection_fixed(
761                    self.addr.add(offset),
762                    fd,
763                    size,
764                    fd_offset,
765                    prot,
766                )?,
767                None => MemoryMapping::new_protection_fixed(self.addr.add(offset), size, prot)?,
768            }
769        };
770
771        // This mapping will get automatically removed when we drop the whole arena.
772        std::mem::forget(mmap);
773        Ok(())
774    }
775
776    /// Removes `size` bytes at `offset` bytes from the start of the arena. `offset` must be page
777    /// aligned.
778    ///
779    /// # Arguments
780    /// * `offset` - Page aligned offset into the arena in bytes.
781    /// * `size` - Size of memory region in bytes.
782    pub fn remove(&mut self, offset: usize, size: usize) -> Result<()> {
783        self.try_add(offset, size, Protection::read(), None)
784    }
785}
786
787// SAFETY:
788// Safe because the pointer and size point to a memory range owned by this MemoryMappingArena that
789// won't be unmapped until it's Dropped.
790unsafe impl MappedRegion for MemoryMappingArena {
791    fn as_ptr(&self) -> *mut u8 {
792        self.addr
793    }
794
795    fn size(&self) -> usize {
796        self.size
797    }
798
799    fn add_fd_mapping(
800        &mut self,
801        offset: usize,
802        size: usize,
803        fd: &dyn AsRawDescriptor,
804        fd_offset: u64,
805        prot: Protection,
806    ) -> Result<()> {
807        self.add_fd_offset_protection(offset, size, fd, fd_offset, prot)
808    }
809
810    fn remove_mapping(&mut self, offset: usize, size: usize) -> Result<()> {
811        self.remove(offset, size)
812    }
813}
814
815impl From<MemoryMapping> for MemoryMappingArena {
816    fn from(mmap: MemoryMapping) -> Self {
817        let addr = mmap.as_ptr();
818        let size = mmap.size();
819
820        // Forget the original mapping because the `MemoryMappingArena` will take care of calling
821        // `munmap` when it is dropped.
822        std::mem::forget(mmap);
823        MemoryMappingArena { addr, size }
824    }
825}
826
827impl From<CrateMemoryMapping> for MemoryMappingArena {
828    fn from(mmap: CrateMemoryMapping) -> Self {
829        MemoryMappingArena::from(mmap.mapping)
830    }
831}
832
833impl Drop for MemoryMappingArena {
834    fn drop(&mut self) {
835        // SAFETY:
836        // This is safe because we own this memory range, and nobody else is holding a reference to
837        // it.
838        unsafe {
839            libc::munmap(self.addr as *mut libc::c_void, self.size);
840        }
841    }
842}
843
844impl CrateMemoryMapping {
845    pub fn use_dontfork(&self) -> Result<()> {
846        self.mapping.use_dontfork()
847    }
848
849    pub fn use_hugepages(&self) -> Result<()> {
850        self.mapping.use_hugepages()
851    }
852
853    pub fn from_raw_ptr(addr: RawDescriptor, size: usize) -> Result<CrateMemoryMapping> {
854        MemoryMapping::from_fd_offset(&Descriptor(addr), size, 0).map(|mapping| {
855            CrateMemoryMapping {
856                mapping,
857                _file_descriptor: None,
858            }
859        })
860    }
861}
862
863pub trait MemoryMappingUnix {
864    /// Remove the specified range from the mapping.
865    fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()>;
866    /// Tell the kernel to readahead the range.
867    fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()>;
868    /// Tell the kernel to drop the page cache.
869    fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()>;
870    /// Lock the resident pages in the range not to be swapped out.
871    fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()>;
872    /// Unlock the range of pages.
873    fn unlock(&self, mem_offset: usize, count: usize) -> Result<()>;
874    /// Disable host swap for this mapping.
875    fn lock_all(&self) -> Result<()>;
876}
877
878impl MemoryMappingUnix for CrateMemoryMapping {
879    fn remove_range(&self, mem_offset: usize, count: usize) -> Result<()> {
880        self.mapping.remove_range(mem_offset, count)
881    }
882    fn async_prefetch(&self, mem_offset: usize, count: usize) -> Result<()> {
883        self.mapping.async_prefetch(mem_offset, count)
884    }
885    fn drop_page_cache(&self, mem_offset: usize, count: usize) -> Result<()> {
886        self.mapping.drop_page_cache(mem_offset, count)
887    }
888    fn lock_on_fault(&self, mem_offset: usize, count: usize) -> Result<()> {
889        self.mapping.lock_on_fault(mem_offset, count)
890    }
891    fn unlock(&self, mem_offset: usize, count: usize) -> Result<()> {
892        self.mapping.unlock(mem_offset, count)
893    }
894    fn lock_all(&self) -> Result<()> {
895        self.mapping.lock_on_fault(0, self.mapping.size())
896    }
897}
898
899pub trait MemoryMappingBuilderUnix<'a> {
900    #[allow(clippy::wrong_self_convention)]
901    fn from_descriptor(self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder<'a>;
902}
903
904impl<'a> MemoryMappingBuilderUnix<'a> for MemoryMappingBuilder<'a> {
905    /// Build the memory mapping given the specified descriptor to mapped memory
906    ///
907    /// Default: Create a new memory mapping.
908    #[allow(clippy::wrong_self_convention)]
909    fn from_descriptor(mut self, descriptor: &'a dyn AsRawDescriptor) -> MemoryMappingBuilder<'a> {
910        self.descriptor = Some(descriptor);
911        self
912    }
913}
914
915impl<'a> MemoryMappingBuilder<'a> {
916    /// Request that the mapped pages are pre-populated
917    ///
918    /// Default: Do not populate
919    pub fn populate(mut self) -> MemoryMappingBuilder<'a> {
920        self.populate = true;
921        self
922    }
923
924    /// Build a MemoryMapping from the provided options.
925    pub fn build(self) -> Result<CrateMemoryMapping> {
926        match self.descriptor {
927            None => {
928                if self.populate {
929                    // Population not supported for new mmaps
930                    return Err(Error::InvalidArgument);
931                }
932                MemoryMappingBuilder::wrap(
933                    MemoryMapping::new_protection(
934                        self.size,
935                        self.align,
936                        self.protection.unwrap_or_else(Protection::read_write),
937                    )?,
938                    None,
939                )
940            }
941            Some(descriptor) => MemoryMappingBuilder::wrap(
942                MemoryMapping::from_fd_offset_protection_populate(
943                    descriptor,
944                    self.size,
945                    self.offset.unwrap_or(0),
946                    self.align.unwrap_or(0),
947                    self.protection.unwrap_or_else(Protection::read_write),
948                    self.populate,
949                )?,
950                None,
951            ),
952        }
953    }
954
955    pub(crate) fn wrap(
956        mapping: MemoryMapping,
957        file_descriptor: Option<&'a dyn AsRawDescriptor>,
958    ) -> Result<CrateMemoryMapping> {
959        let file_descriptor = match file_descriptor {
960            Some(descriptor) => Some(
961                SafeDescriptor::try_from(descriptor)
962                    .map_err(|_| Error::SystemCallFailed(ErrnoError::last()))?,
963            ),
964            None => None,
965        };
966        Ok(CrateMemoryMapping {
967            mapping,
968            _file_descriptor: file_descriptor,
969        })
970    }
971}
972
973#[cfg(test)]
974mod tests {
975    use tempfile::tempfile;
976
977    use super::*;
978    use crate::descriptor::Descriptor;
979    use crate::VolatileMemory;
980    use crate::VolatileMemoryError;
981
982    #[test]
983    fn basic_map() {
984        let m = MemoryMappingBuilder::new(1024).build().unwrap();
985        assert_eq!(1024, m.size());
986    }
987
988    #[test]
989    fn map_invalid_size() {
990        let res = MemoryMappingBuilder::new(0).build().unwrap_err();
991        if let Error::SystemCallFailed(e) = res {
992            assert_eq!(e.errno(), libc::EINVAL);
993        } else {
994            panic!("unexpected error: {res}");
995        }
996    }
997
998    #[test]
999    fn map_invalid_fd() {
1000        let fd = Descriptor(-1);
1001        let res = MemoryMapping::from_fd(&fd, 1024).unwrap_err();
1002        if let Error::SystemCallFailed(e) = res {
1003            assert_eq!(e.errno(), libc::EBADF);
1004        } else {
1005            panic!("unexpected error: {res}");
1006        }
1007    }
1008
1009    #[test]
1010    fn test_write_past_end() {
1011        let m = MemoryMappingBuilder::new(5).build().unwrap();
1012        let res = m.write_slice(&[1, 2, 3, 4, 5, 6], 0);
1013        assert!(res.is_ok());
1014        assert_eq!(res.unwrap(), 5);
1015    }
1016
1017    #[test]
1018    fn slice_size() {
1019        let m = MemoryMappingBuilder::new(5).build().unwrap();
1020        let s = m.get_slice(2, 3).unwrap();
1021        assert_eq!(s.size(), 3);
1022    }
1023
1024    #[test]
1025    fn slice_addr() {
1026        let m = MemoryMappingBuilder::new(5).build().unwrap();
1027        let s = m.get_slice(2, 3).unwrap();
1028        // SAFETY: all addresses are known to exist.
1029        assert_eq!(s.as_ptr(), unsafe { m.as_ptr().offset(2) });
1030    }
1031
1032    #[test]
1033    fn slice_overflow_error() {
1034        let m = MemoryMappingBuilder::new(5).build().unwrap();
1035        let res = m.get_slice(usize::MAX, 3).unwrap_err();
1036        assert_eq!(
1037            res,
1038            VolatileMemoryError::Overflow {
1039                base: usize::MAX,
1040                offset: 3,
1041            }
1042        );
1043    }
1044    #[test]
1045    fn slice_oob_error() {
1046        let m = MemoryMappingBuilder::new(5).build().unwrap();
1047        let res = m.get_slice(3, 3).unwrap_err();
1048        assert_eq!(res, VolatileMemoryError::OutOfBounds { addr: 6 });
1049    }
1050
1051    #[test]
1052    fn from_fd_offset_invalid() {
1053        let fd = tempfile().unwrap();
1054        let res =
1055            MemoryMapping::from_fd_offset(&fd, 4096, (libc::off64_t::MAX as u64) + 1).unwrap_err();
1056        match res {
1057            Error::InvalidOffset => {}
1058            e => panic!("unexpected error: {e}"),
1059        }
1060    }
1061
1062    #[test]
1063    fn arena_new() {
1064        let m = MemoryMappingArena::new(0x40000).unwrap();
1065        assert_eq!(m.size(), 0x40000);
1066    }
1067
1068    #[test]
1069    fn arena_add() {
1070        let mut m = MemoryMappingArena::new(0x40000).unwrap();
1071        assert!(m.add_anon(0, pagesize() * 4).is_ok());
1072    }
1073
1074    #[test]
1075    fn arena_remove() {
1076        let mut m = MemoryMappingArena::new(0x40000).unwrap();
1077        assert!(m.add_anon(0, pagesize() * 4).is_ok());
1078        assert!(m.remove(0, pagesize()).is_ok());
1079        assert!(m.remove(0, pagesize() * 2).is_ok());
1080    }
1081
1082    #[test]
1083    fn arena_add_alignment_error() {
1084        let mut m = MemoryMappingArena::new(pagesize() * 2).unwrap();
1085        assert!(m.add_anon(0, 0x100).is_ok());
1086        let res = m.add_anon(pagesize() + 1, 0x100).unwrap_err();
1087        match res {
1088            Error::NotPageAligned => {}
1089            e => panic!("unexpected error: {e}"),
1090        }
1091    }
1092
1093    #[test]
1094    fn arena_add_oob_error() {
1095        let mut m = MemoryMappingArena::new(pagesize()).unwrap();
1096        let res = m.add_anon(0, pagesize() + 1).unwrap_err();
1097        match res {
1098            Error::InvalidAddress => {}
1099            e => panic!("unexpected error: {e}"),
1100        }
1101    }
1102
1103    #[test]
1104    fn arena_add_overlapping() {
1105        let ps = pagesize();
1106        let mut m =
1107            MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1108        m.add_anon(ps * 4, ps * 4)
1109            .expect("failed to add sub-mapping");
1110
1111        // Overlap in the front.
1112        m.add_anon(ps * 2, ps * 3)
1113            .expect("failed to add front overlapping sub-mapping");
1114
1115        // Overlap in the back.
1116        m.add_anon(ps * 7, ps * 3)
1117            .expect("failed to add back overlapping sub-mapping");
1118
1119        // Overlap the back of the first mapping, all of the middle mapping, and the front of the
1120        // last mapping.
1121        m.add_anon(ps * 3, ps * 6)
1122            .expect("failed to add mapping that overlaps several mappings");
1123    }
1124
1125    #[test]
1126    fn arena_remove_overlapping() {
1127        let ps = pagesize();
1128        let mut m =
1129            MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1130        m.add_anon(ps * 4, ps * 4)
1131            .expect("failed to add sub-mapping");
1132        m.add_anon(ps * 2, ps * 2)
1133            .expect("failed to add front overlapping sub-mapping");
1134        m.add_anon(ps * 8, ps * 2)
1135            .expect("failed to add back overlapping sub-mapping");
1136
1137        // Remove the back of the first mapping and the front of the second.
1138        m.remove(ps * 3, ps * 2)
1139            .expect("failed to remove front overlapping mapping");
1140
1141        // Remove the back of the second mapping and the front of the third.
1142        m.remove(ps * 7, ps * 2)
1143            .expect("failed to remove back overlapping mapping");
1144
1145        // Remove a mapping that completely overlaps the middle mapping.
1146        m.remove(ps * 5, ps * 2)
1147            .expect("failed to remove fully overlapping mapping");
1148    }
1149
1150    #[test]
1151    fn arena_remove_unaligned() {
1152        let ps = pagesize();
1153        let mut m =
1154            MemoryMappingArena::new(12 * ps).expect("failed to create `MemoryMappingArena`");
1155
1156        m.add_anon(0, ps).expect("failed to add mapping");
1157        m.remove(0, ps - 1)
1158            .expect("failed to remove unaligned mapping");
1159    }
1160
1161    #[test]
1162    fn arena_msync() {
1163        let size = 0x40000;
1164        let m = MemoryMappingArena::new(size).unwrap();
1165        let ps = pagesize();
1166        <dyn MappedRegion>::msync(&m, 0, ps).unwrap();
1167        <dyn MappedRegion>::msync(&m, 0, size).unwrap();
1168        <dyn MappedRegion>::msync(&m, ps, size - ps).unwrap();
1169        let res = <dyn MappedRegion>::msync(&m, ps, size).unwrap_err();
1170        match res {
1171            Error::InvalidAddress => {}
1172            e => panic!("unexpected error: {e}"),
1173        }
1174    }
1175
1176    #[test]
1177    fn arena_madvise() {
1178        let size = 0x40000;
1179        let mut m = MemoryMappingArena::new(size).unwrap();
1180        m.add_anon_protection(0, size, Protection::read_write())
1181            .expect("failed to add writable protection for madvise MADV_REMOVE");
1182        let ps = pagesize();
1183        <dyn MappedRegion>::madvise(&m, 0, ps, libc::MADV_PAGEOUT).unwrap();
1184        <dyn MappedRegion>::madvise(&m, 0, size, libc::MADV_PAGEOUT).unwrap();
1185        <dyn MappedRegion>::madvise(&m, ps, size - ps, libc::MADV_REMOVE).unwrap();
1186        let res = <dyn MappedRegion>::madvise(&m, ps, size, libc::MADV_PAGEOUT).unwrap_err();
1187        match res {
1188            Error::InvalidAddress => {}
1189            e => panic!("unexpected error: {e}"),
1190        }
1191    }
1192}