swap/
file.rs

1// Copyright 2022 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#![deny(missing_docs)]
6
7use std::fs::File;
8use std::ops::Range;
9use std::os::unix::fs::FileExt;
10
11use base::error;
12use base::linux::MemoryMappingUnix;
13use base::MemoryMapping;
14use base::MemoryMappingBuilder;
15use base::MmapError;
16use base::Protection;
17use base::VolatileMemory;
18use base::VolatileMemoryError;
19use base::VolatileSlice;
20use thiserror::Error as ThisError;
21
22use crate::pagesize::bytes_to_pages;
23use crate::pagesize::is_page_aligned;
24use crate::pagesize::pages_to_bytes;
25
26pub type Result<T> = std::result::Result<T, Error>;
27
28// On 4KB page size system, guest memory must be less than 8 TiB which is reasonable assumption.
29const MAX_PAGE_IDX: usize = (1 << 31) - 2;
30
31#[derive(ThisError, Debug)]
32pub enum Error {
33    #[error("failed to io: {0}")]
34    Io(#[from] std::io::Error),
35    #[error("failed to mmap operation ({0}): {1}")]
36    Mmap(&'static str, MmapError),
37    #[error("failed to volatile memory operation: {0}")]
38    VolatileMemory(#[from] VolatileMemoryError),
39    #[error("index is out of range")]
40    OutOfRange,
41    #[error("data size is invalid")]
42    InvalidSize,
43    #[error("index is invalid")]
44    InvalidIndex,
45}
46
47/// u32 to pack the state of a page on the file.
48///
49/// * MSB: Whether the page on file is freed. (1: freed, 2: allocated)
50/// * lower 31 bits:
51///   * The corresponding page index if the file page is allocated.
52///   * The file page index + 1 of next freed file page if the file page is freed. Zero means it is
53///     the last page in the free list.
54#[derive(Debug)]
55struct FilePageState(u32);
56
57impl FilePageState {
58    const FREED_BIT_MASK: u32 = 1 << 31;
59
60    fn freed_state(first_freed_page: Option<usize>) -> Self {
61        Self(
62            Self::FREED_BIT_MASK
63                | first_freed_page
64                    .map(|idx_file| idx_file as u32 + 1)
65                    .unwrap_or(0),
66        )
67    }
68
69    fn allocated_state(idx_page: usize) -> Option<Self> {
70        if idx_page <= MAX_PAGE_IDX {
71            Some(Self(idx_page as u32))
72        } else {
73            // idx_page is invalid.
74            None
75        }
76    }
77
78    fn is_freed(&self) -> bool {
79        self.0 & Self::FREED_BIT_MASK != 0
80    }
81
82    /// This is valid only if the page is freed.
83    fn next_file_freed_idx(&self) -> Option<Option<usize>> {
84        if self.is_freed() {
85            let next_idx_file = !Self::FREED_BIT_MASK & self.0;
86            if next_idx_file == 0 {
87                Some(None)
88            } else {
89                Some(Some(next_idx_file as usize - 1))
90            }
91        } else {
92            None
93        }
94    }
95
96    /// This is valid only if the page is allocated.
97    fn idx_page(&self) -> Option<usize> {
98        if self.is_freed() {
99            // The file page is freed.
100            None
101        } else {
102            Some(self.0 as usize)
103        }
104    }
105}
106
107#[derive(Debug)]
108struct FilePageStates {
109    /// Freed pages on the swap file are managed in a free list. `first_freed_idx_file` points to
110    /// the first page index in the list.
111    first_idx_file_freed: Option<usize>,
112    states: Vec<FilePageState>,
113}
114
115impl FilePageStates {
116    fn new(capacity: usize) -> Self {
117        FilePageStates {
118            first_idx_file_freed: None,
119            states: Vec::with_capacity(capacity),
120        }
121    }
122
123    fn len(&self) -> usize {
124        self.states.len()
125    }
126
127    /// Free a page on swap file.
128    fn free(&mut self, idx_file: usize) {
129        self.states[idx_file] = FilePageState::freed_state(self.first_idx_file_freed);
130        self.first_idx_file_freed = Some(idx_file);
131    }
132
133    /// Allocates a file page on the swap file.
134    ///
135    /// This returns the index of the allocated file page.
136    ///
137    /// This reuses freed file pages first. If the free list is empty, this allocates new pages in
138    /// the file.
139    fn allocate(&mut self, idx_page: usize) -> usize {
140        if let Some(idx_file_freed) = self.first_idx_file_freed {
141            // TODO(kawasin): Collect consecutive freed pages in the free list to reduce number of
142            // writes.
143            let Some(next_idx_file_freed) = self.states[idx_file_freed].next_file_freed_idx()
144            else {
145                unreachable!("pages in free list must be freed pages")
146            };
147            let Some(state) = FilePageState::allocated_state(idx_page) else {
148                unreachable!("idx_page must be less than MAX_PAGE_IDX");
149            };
150            self.states[idx_file_freed] = state;
151            self.first_idx_file_freed = next_idx_file_freed;
152
153            idx_file_freed
154        } else {
155            // The free list is empty. Allocate new pages.
156            let head_idx_file = self.states.len();
157            let Some(state) = FilePageState::allocated_state(idx_page) else {
158                unreachable!("idx must be less than MAX_PAGE_IDX");
159            };
160            self.states.push(state);
161            head_idx_file
162        }
163    }
164
165    /// Find the index range of file pages that are all present.
166    ///
167    /// This returns the pair of range of file page indexes and the index of the corresponding first
168    /// page.
169    ///
170    /// Returns `None` if no pages after `idx_file` are present.
171    ///
172    /// # Arguments
173    ///
174    /// * `idx_file` - The first index to start searching from.
175    /// * `page_states` - The page states
176    /// * `max_pages` - The maximum number of pages to search.
177    /// * `consecutive` - If true, the pages must have consecutive idx_page values.
178    fn find_present_pages_range(
179        &self,
180        idx_file: usize,
181        page_states: &[PageState],
182        max_pages: usize,
183        consecutive: bool,
184    ) -> Option<(Range<usize>, usize)> {
185        let next_head_idx_offset = self.states[idx_file..].iter().position(|state| {
186            !state.is_freed()
187                && page_states[state
188                    .idx_page()
189                    .unwrap_or_else(|| unreachable!("the page is not freed"))]
190                .is_present()
191        })?;
192        let idx_file = idx_file + next_head_idx_offset;
193
194        let Some(head_idx_page) = self.states[idx_file].idx_page() else {
195            unreachable!("the file page must not be freed");
196        };
197
198        let mut pages = 1;
199
200        if max_pages > 1 {
201            for state in self.states[idx_file + 1..].iter() {
202                if state.is_freed() {
203                    break;
204                } else {
205                    let Some(idx_page) = state.idx_page() else {
206                        unreachable!("allocated page must have idx_page");
207                    };
208                    if !page_states[idx_page].is_present()
209                        || (consecutive && idx_page != head_idx_page + pages)
210                    {
211                        break;
212                    }
213                }
214
215                pages += 1;
216                if pages >= max_pages {
217                    break;
218                }
219            }
220        }
221
222        Some((idx_file..idx_file + pages, head_idx_page))
223    }
224}
225
226/// u32 to pack the state of a guest memory page.
227///
228/// * If the page is not on the swap file, the value is zero.
229/// * MSB: Whether the page is stale or not. (0: stale, 1: present).
230/// * lower 31 bits: The corresponding file page index + 1. Never be zero.
231#[derive(Clone, Debug)]
232struct PageState(u32);
233
234impl PageState {
235    const IDX_FILE_MASK: u32 = (1 << 31) - 1;
236    const PRESENT_BIT_MASK: u32 = 1 << 31;
237
238    fn is_none(&self) -> bool {
239        self.0 == 0
240    }
241
242    fn idx_file(&self) -> Option<usize> {
243        if self.0 != 0 {
244            Some((self.0 & Self::IDX_FILE_MASK) as usize - 1)
245        } else {
246            None
247        }
248    }
249
250    fn is_present(&self) -> bool {
251        self.0 & Self::PRESENT_BIT_MASK != 0
252    }
253
254    fn update(&mut self, idx_file: usize) {
255        self.0 = (idx_file as u32 + 1) | Self::PRESENT_BIT_MASK;
256    }
257
258    fn mark_as_present(&mut self) {
259        self.0 |= Self::PRESENT_BIT_MASK;
260    }
261
262    fn clear(&mut self) {
263        self.0 &= !Self::PRESENT_BIT_MASK;
264    }
265
266    fn free(&mut self) {
267        self.0 = 0;
268    }
269}
270
271/// [SwapFile] stores active pages in a memory region.
272///
273/// This shares the swap file with other regions and creates mmap corresponding range in the file.
274///
275/// TODO(kawasin): The file structure is straightforward and is not optimized yet.
276/// Each page in the file corresponds to the page in the memory region.
277#[derive(Debug)]
278pub struct SwapFile<'a> {
279    file: &'a File,
280    file_mmap: MemoryMapping,
281    page_states: Vec<PageState>,
282    file_states: FilePageStates,
283    // All the data pages before this index are mlock(2)ed.
284    cursor_mlock: usize,
285    min_possible_present_idx_file: usize,
286}
287
288impl<'a> SwapFile<'a> {
289    /// Creates an initialized [SwapFile] for a memory region.
290    ///
291    /// The all pages are marked as empty at first time.
292    ///
293    /// # Arguments
294    ///
295    /// * `file` - The swap file.
296    /// * `num_of_pages` - The number of pages in the region.
297    pub fn new(file: &'a File, num_of_pages: usize) -> Result<Self> {
298        if num_of_pages > MAX_PAGE_IDX {
299            return Err(Error::InvalidSize);
300        }
301        let file_mmap = MemoryMappingBuilder::new(pages_to_bytes(num_of_pages))
302            .from_file(file)
303            .protection(Protection::read())
304            .build()
305            .map_err(|e| Error::Mmap("create", e))?;
306        Ok(Self {
307            file,
308            file_mmap,
309            page_states: vec![PageState(0); num_of_pages],
310            file_states: FilePageStates::new(num_of_pages),
311            cursor_mlock: 0,
312            min_possible_present_idx_file: 0,
313        })
314    }
315
316    /// Returns a content of the page corresponding to the index if it is present.
317    ///
318    /// Returns [Option::None] if no content in the file.
319    ///
320    /// Returns [Error::OutOfRange] if the `idx` is out of range.
321    ///
322    /// # Arguments
323    ///
324    /// * `idx_page` - the index of the page from the head of the pages.
325    pub fn page_content(
326        &self,
327        idx_page: usize,
328        allow_cleared: bool,
329    ) -> Result<Option<VolatileSlice>> {
330        let state = self.page_states.get(idx_page).ok_or(Error::OutOfRange)?;
331        if !state.is_none() && (allow_cleared || state.is_present()) {
332            let Some(idx_file) = state.idx_file() else {
333                unreachable!("the page is not none");
334            };
335            return match self
336                .file_mmap
337                .get_slice(pages_to_bytes(idx_file), pages_to_bytes(1))
338            {
339                Ok(slice) => Ok(Some(slice)),
340                Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange),
341                Err(e) => Err(e.into()),
342            };
343        }
344        Ok(None)
345    }
346
347    /// Start readahead the swap file into the page cache from the head.
348    ///
349    /// This also `mlock2(2)` the pages not to be dropped again after populated. This does not block
350    /// the caller thread by I/O wait because:
351    ///
352    /// * `mlock2(2)` is executed with `MLOCK_ONFAULT`.
353    /// * `MADV_WILLNEED` is the same as `readahead(2)` which triggers the readahead background.
354    ///   * However Linux has a bug that `readahead(2)` (and also `MADV_WILLNEED`) may block due to
355    ///     reading the filesystem metadata.
356    ///
357    /// This returns the number of consecutive pages which are newly mlock(2)ed. Returning `0` means
358    /// that there is no more data to be mlock(2)ed in this file.
359    ///
360    /// The caller must track the number of pages mlock(2)ed not to mlock(2) more pages than
361    /// `RLIMIT_MEMLOCK` if it does not have `CAP_IPC_LOCK`.
362    ///
363    /// # Arguments
364    ///
365    /// * `max_pages` - The maximum number of pages to be mlock(2)ed at once.
366    pub fn lock_and_async_prefetch(&mut self, max_pages: usize) -> Result<usize> {
367        if let Some((idx_file_range, _)) = self.file_states.find_present_pages_range(
368            self.cursor_mlock,
369            &self.page_states,
370            max_pages,
371            false,
372        ) {
373            let pages = idx_file_range.end - idx_file_range.start;
374            let mem_offset = pages_to_bytes(idx_file_range.start);
375            let size_in_bytes = pages_to_bytes(pages);
376            self.file_mmap
377                .lock_on_fault(mem_offset, size_in_bytes)
378                .map_err(|e| Error::Mmap("mlock", e))?;
379            self.file_mmap
380                .async_prefetch(mem_offset, size_in_bytes)
381                .map_err(|e| Error::Mmap("madvise willneed", e))?;
382            self.cursor_mlock = idx_file_range.end;
383            Ok(pages)
384        } else {
385            self.cursor_mlock = self.file_states.len();
386            Ok(0)
387        }
388    }
389
390    /// Mark the pages in the file corresponding to the index as cleared.
391    ///
392    /// The contents on the swap file are preserved and will be reused by
393    /// `SwapFile::mark_as_present()` and reduce disk I/O.
394    ///
395    /// If the pages are mlock(2)ed, unlock them before MADV_DONTNEED. This returns the number of
396    /// pages munlock(2)ed.
397    ///
398    /// # Arguments
399    ///
400    /// * `idx_page_range` - The indices of consecutive pages to be cleared. All the pages must be
401    ///   present and consecutive in the compacted file.
402    pub fn clear_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> {
403        let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range.clone())?;
404
405        for state in &mut self.page_states[idx_page_range] {
406            state.clear();
407        }
408
409        let offset = pages_to_bytes(idx_file_range.start);
410        let munlocked_size = if idx_file_range.start < self.cursor_mlock {
411            // idx_page_range is validated at clear_range() and self.cursor_mlock is within the
412            // mmap.
413            let pages = idx_file_range.end.min(self.cursor_mlock) - idx_file_range.start;
414            // munlock(2) first because MADV_DONTNEED fails for mlock(2)ed pages.
415            self.file_mmap
416                .unlock(offset, pages_to_bytes(pages))
417                .map_err(|e| Error::Mmap("munlock", e))?;
418            pages
419        } else {
420            0
421        };
422        // offset and size are validated at clear_range().
423        let size = pages_to_bytes(idx_file_range.end - idx_file_range.start);
424        // The page cache is cleared without writing pages back to file even if they are dirty.
425        // The disk contents which may not be the latest are kept for later trim optimization.
426        self.file_mmap
427            .drop_page_cache(offset, size)
428            .map_err(|e| Error::Mmap("madvise dontneed", e))?;
429        Ok(munlocked_size)
430    }
431
432    /// Free the pages corresponding to the given range in the file.
433    ///
434    /// If the pages are mlock(2)ed, unlock them. This returns the number of pages munlock(2)ed.
435    ///
436    /// # Arguments
437    ///
438    /// * `idx_page_range` - The indices of consecutive pages to be freed. This may contains
439    ///   non-present pages.
440    pub fn free_range(&mut self, idx_page_range: Range<usize>) -> Result<usize> {
441        if idx_page_range.end > self.page_states.len() {
442            return Err(Error::OutOfRange);
443        }
444        let mut mlocked_pages = 0;
445        let mut mlock_range: Option<Range<usize>> = None;
446        for state in &mut self.page_states[idx_page_range] {
447            if !state.is_none() {
448                let Some(idx_file) = state.idx_file() else {
449                    unreachable!("the page is not none.");
450                };
451                self.file_states.free(idx_file);
452
453                if idx_file < self.cursor_mlock && state.is_present() {
454                    mlocked_pages += 1;
455                    if let Some(range) = mlock_range.as_mut() {
456                        if idx_file + 1 == range.start {
457                            range.start = idx_file;
458                        } else if idx_file == range.end {
459                            range.end += 1;
460                        } else {
461                            self.file_mmap
462                                .unlock(
463                                    pages_to_bytes(range.start),
464                                    pages_to_bytes(range.end - range.start),
465                                )
466                                .map_err(|e| Error::Mmap("munlock", e))?;
467                            mlock_range = Some(idx_file..idx_file + 1);
468                        }
469                    } else {
470                        mlock_range = Some(idx_file..idx_file + 1);
471                    }
472                }
473            }
474            state.free();
475        }
476        if let Some(mlock_range) = mlock_range {
477            self.file_mmap
478                .unlock(
479                    pages_to_bytes(mlock_range.start),
480                    pages_to_bytes(mlock_range.end - mlock_range.start),
481                )
482                .map_err(|e| Error::Mmap("munlock", e))?;
483        }
484
485        Ok(mlocked_pages)
486    }
487
488    /// munlock(2) pages if there are mlock(2)ed pages in the mmap and reset the internal cursor for
489    /// mlock(2) tracking.
490    pub fn clear_mlock(&mut self) -> Result<()> {
491        if self.cursor_mlock > 0 {
492            // cursor_mlock is not `0` only when disabling vmm-swap is aborted by overriding
493            // vmm-swap enable. munlock(2)ing the whole possible pages is not a problem because this
494            // is not a hot path.
495            self.file_mmap
496                .unlock(0, pages_to_bytes(self.cursor_mlock))
497                .map_err(|e| Error::Mmap("munlock", e))?;
498        }
499        self.cursor_mlock = 0;
500        Ok(())
501    }
502
503    /// Mark the page as present on the file.
504    ///
505    /// The content on the swap file on previous `SwapFile::write_to_file()` is reused.
506    ///
507    /// # Arguments
508    ///
509    /// * `idx` - the index of the page from the head of the pages.
510    pub fn mark_as_present(&mut self, idx_page: usize) -> Result<()> {
511        let state = self
512            .page_states
513            .get_mut(idx_page)
514            .ok_or(Error::OutOfRange)?;
515        if !state.is_none() && !state.is_present() {
516            state.mark_as_present();
517            let Some(idx_file) = state.idx_file() else {
518                unreachable!("the page is not none.");
519            };
520            self.min_possible_present_idx_file =
521                std::cmp::min(idx_file, self.min_possible_present_idx_file);
522            Ok(())
523        } else {
524            Err(Error::InvalidIndex)
525        }
526    }
527
528    /// Writes the contents to the swap file.
529    ///
530    /// # Arguments
531    ///
532    /// * `idx_page` - the index of the head page of the content from the head of the pages.
533    /// * `mem_slice` - the page content(s). this can be more than 1 page. the size must align with
534    ///   the pagesize.
535    pub fn write_to_file(&mut self, idx_page: usize, mem_slice: &[u8]) -> Result<()> {
536        // validate
537        if !is_page_aligned(mem_slice.len()) {
538            // mem_slice size must align with page size.
539            return Err(Error::InvalidSize);
540        }
541        let num_pages = bytes_to_pages(mem_slice.len());
542        if idx_page + num_pages > self.page_states.len() {
543            return Err(Error::OutOfRange);
544        }
545
546        // Setting 0 is faster than setting exact index by complex conditions.
547        self.min_possible_present_idx_file = 0;
548
549        for cur in idx_page..idx_page + num_pages {
550            let state = &mut self.page_states[cur];
551            if state.is_none() {
552                let idx_file = self.file_states.allocate(cur);
553                state.update(idx_file);
554            } else {
555                state.mark_as_present();
556            }
557        }
558
559        let mut pending_idx_file = None;
560        let mut pending_pages = 0;
561        let mut mem_slice = mem_slice;
562        for state in self.page_states[idx_page..idx_page + num_pages].iter() {
563            let Some(idx_file) = state.idx_file() else {
564                unreachable!("pages must be allocated");
565            };
566            if let Some(pending_idx_file) = pending_idx_file {
567                if idx_file == pending_idx_file + pending_pages {
568                    pending_pages += 1;
569                    continue;
570                }
571                let size = pages_to_bytes(pending_pages);
572                // Write with pwrite(2) syscall instead of copying contents to mmap because write
573                // syscall is more explicit for kernel how many pages are going to be written while
574                // mmap only knows each page to be written on a page fault basis.
575                self.file
576                    .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?;
577                mem_slice = &mem_slice[size..];
578            }
579            pending_idx_file = Some(idx_file);
580            pending_pages = 1;
581        }
582        if let Some(pending_idx_file) = pending_idx_file {
583            let size = pages_to_bytes(pending_pages);
584            self.file
585                .write_all_at(&mem_slice[..size], pages_to_bytes(pending_idx_file) as u64)?;
586            mem_slice = &mem_slice[size..];
587        }
588        if !mem_slice.is_empty() {
589            unreachable!("mem_slice must be all consumed");
590        }
591
592        Ok(())
593    }
594
595    /// Returns the first range of indices of consecutive pages present in the swap file.
596    ///
597    /// # Arguments
598    ///
599    /// * `max_pages` - the max size of the returned chunk even if the chunk of consecutive present
600    ///   pages is longer than this.
601    pub fn first_data_range(&mut self, max_pages: usize) -> Option<Range<usize>> {
602        if let Some((idx_file_range, head_idx_page)) = self.file_states.find_present_pages_range(
603            self.min_possible_present_idx_file,
604            &self.page_states,
605            max_pages,
606            true,
607        ) {
608            self.min_possible_present_idx_file = idx_file_range.start;
609            let idx_page_range =
610                head_idx_page..head_idx_page + idx_file_range.end - idx_file_range.start;
611            Some(idx_page_range)
612        } else {
613            self.min_possible_present_idx_file = self.file_states.len();
614            None
615        }
616    }
617
618    /// Returns the [VolatileSlice] corresponding to the indices regardless of whether the pages are
619    /// present or not.
620    ///
621    /// If the range is out of the region, this returns [Error::OutOfRange].
622    ///
623    /// # Arguments
624    ///
625    /// * `idx_page_range` - the indices of the pages. All the pages must be present and consecutive
626    ///   in the compacted file.
627    pub fn get_slice(&self, idx_page_range: Range<usize>) -> Result<VolatileSlice> {
628        let idx_file_range = self.convert_idx_page_range_to_idx_file(idx_page_range)?;
629        match self.file_mmap.get_slice(
630            pages_to_bytes(idx_file_range.start),
631            pages_to_bytes(idx_file_range.end - idx_file_range.start),
632        ) {
633            Ok(slice) => Ok(slice),
634            Err(VolatileMemoryError::OutOfBounds { .. }) => Err(Error::OutOfRange),
635            Err(e) => Err(e.into()),
636        }
637    }
638
639    /// Returns the count of present pages in the swap file.
640    pub fn present_pages(&self) -> usize {
641        self.page_states
642            .iter()
643            .map(|state| state.is_present() as usize)
644            .sum()
645    }
646
647    /// Convert the index range to corresponding index range of compacted file.
648    ///
649    /// This validates that the `idx_page_range` satisfy:
650    ///
651    /// * `idx_page_range` has corresponding page in the file.
652    /// * corresponding index range in the file is consecutive.
653    fn convert_idx_page_range_to_idx_file(
654        &self,
655        idx_page_range: Range<usize>,
656    ) -> Result<Range<usize>> {
657        // Validate that the idx_range is for cosecutive present file pages.
658        let state = self
659            .page_states
660            .get(idx_page_range.start)
661            .ok_or(Error::OutOfRange)?;
662        if state.is_none() || !state.is_present() {
663            return Err(Error::InvalidIndex);
664        }
665        let Some(head_idx_file) = state.idx_file() else {
666            unreachable!("the page is not none.");
667        };
668        let mut idx_file = head_idx_file;
669        for idx in idx_page_range.start + 1..idx_page_range.end {
670            let state = self.page_states.get(idx).ok_or(Error::OutOfRange)?;
671            idx_file += 1;
672            if state.is_none()
673                || !state.is_present()
674                || state
675                    .idx_file()
676                    .unwrap_or_else(|| unreachable!("the page is not none."))
677                    != idx_file
678            {
679                return Err(Error::InvalidIndex);
680            }
681        }
682        let idx_file_range =
683            head_idx_file..head_idx_file + idx_page_range.end - idx_page_range.start;
684        Ok(idx_file_range)
685    }
686}
687
688#[cfg(test)]
689mod tests {
690    use std::slice;
691
692    use base::pagesize;
693    use base::sys::FileDataIterator;
694
695    use super::*;
696
697    #[test]
698    fn new_success() {
699        let file = tempfile::tempfile().unwrap();
700
701        assert_eq!(SwapFile::new(&file, 200).is_ok(), true);
702    }
703
704    #[test]
705    fn len() {
706        let file = tempfile::tempfile().unwrap();
707        let swap_file = SwapFile::new(&file, 200).unwrap();
708
709        assert_eq!(swap_file.page_states.len(), 200);
710    }
711
712    #[test]
713    fn page_content_default_is_none() {
714        let file = tempfile::tempfile().unwrap();
715        let swap_file = SwapFile::new(&file, 200).unwrap();
716
717        assert_eq!(swap_file.page_content(0, false).unwrap().is_none(), true);
718    }
719
720    #[test]
721    fn page_content_returns_content() {
722        let file = tempfile::tempfile().unwrap();
723        let mut swap_file = SwapFile::new(&file, 200).unwrap();
724
725        let data = &vec![1; pagesize()];
726        swap_file.write_to_file(0, data).unwrap();
727
728        let page = swap_file.page_content(0, false).unwrap().unwrap();
729        // TODO(b/315998194): Add safety comment
730        #[allow(clippy::undocumented_unsafe_blocks)]
731        let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) };
732        assert_eq!(result, data);
733    }
734
735    #[test]
736    fn page_content_out_of_range() {
737        let file = tempfile::tempfile().unwrap();
738        let swap_file = SwapFile::new(&file, 200).unwrap();
739
740        assert_eq!(swap_file.page_content(199, false).is_ok(), true);
741        match swap_file.page_content(200, false) {
742            Err(Error::OutOfRange) => {}
743            _ => unreachable!("not out of range"),
744        }
745    }
746
747    fn assert_page_content(swap_file: &SwapFile, idx: usize, data: &[u8]) {
748        let page = swap_file.page_content(idx, false).unwrap().unwrap();
749        // TODO(b/315998194): Add safety comment
750        #[allow(clippy::undocumented_unsafe_blocks)]
751        let result = unsafe { slice::from_raw_parts(page.as_ptr(), pagesize()) };
752        assert_eq!(result, data);
753    }
754
755    #[test]
756    fn write_to_file_swap_file() {
757        let file = tempfile::tempfile().unwrap();
758        let mut swap_file = SwapFile::new(&file, 200).unwrap();
759
760        let buf1 = &vec![1; pagesize()];
761        let buf2 = &vec![2; 2 * pagesize()];
762        swap_file.write_to_file(0, buf1).unwrap();
763        swap_file.write_to_file(2, buf2).unwrap();
764
765        // page_content()
766        assert_page_content(&swap_file, 0, buf1);
767        assert_page_content(&swap_file, 2, &buf2[0..pagesize()]);
768        assert_page_content(&swap_file, 3, &buf2[pagesize()..2 * pagesize()]);
769    }
770
771    #[test]
772    fn write_to_file_invalid_size() {
773        let file = tempfile::tempfile().unwrap();
774        let mut swap_file = SwapFile::new(&file, 200).unwrap();
775
776        let buf = &vec![1; pagesize() + 1];
777        match swap_file.write_to_file(0, buf) {
778            Err(Error::InvalidSize) => {}
779            _ => unreachable!("not invalid size"),
780        };
781    }
782
783    #[test]
784    fn write_to_file_out_of_range() {
785        let file = tempfile::tempfile().unwrap();
786        let mut swap_file = SwapFile::new(&file, 200).unwrap();
787
788        let buf1 = &vec![1; pagesize()];
789        let buf2 = &vec![2; 2 * pagesize()];
790        match swap_file.write_to_file(200, buf1) {
791            Err(Error::OutOfRange) => {}
792            _ => unreachable!("not out of range"),
793        };
794        match swap_file.write_to_file(199, buf2) {
795            Err(Error::OutOfRange) => {}
796            _ => unreachable!("not out of range"),
797        };
798    }
799
800    #[test]
801    fn write_to_file_overwrite() {
802        let file = tempfile::tempfile().unwrap();
803        let mut swap_file = SwapFile::new(&file, 200).unwrap();
804
805        swap_file.write_to_file(0, &vec![1; pagesize()]).unwrap();
806        swap_file
807            .write_to_file(2, &vec![2; 2 * pagesize()])
808            .unwrap();
809
810        let mut buf = vec![0; 3 * pagesize()];
811        buf[..pagesize()].fill(3);
812        buf[pagesize()..2 * pagesize()].fill(4);
813        buf[2 * pagesize()..3 * pagesize()].fill(5);
814        swap_file.write_to_file(0, &buf).unwrap();
815
816        assert_page_content(&swap_file, 0, &vec![3; pagesize()]);
817        assert_page_content(&swap_file, 1, &vec![4; pagesize()]);
818        assert_page_content(&swap_file, 2, &vec![5; pagesize()]);
819        assert_page_content(&swap_file, 3, &vec![2; pagesize()]);
820        assert!(swap_file.page_content(4, false).unwrap().is_none());
821
822        let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
823            .collect::<std::result::Result<Vec<_>, _>>();
824        assert_eq!(data, Ok(vec![0..4 * pagesize() as u64]));
825
826        buf[..pagesize()].fill(6);
827        buf[pagesize()..2 * pagesize()].fill(7);
828        buf[2 * pagesize()..3 * pagesize()].fill(8);
829        swap_file.write_to_file(2, &buf).unwrap();
830        assert_page_content(&swap_file, 0, &vec![3; pagesize()]);
831        assert_page_content(&swap_file, 1, &vec![4; pagesize()]);
832        assert_page_content(&swap_file, 2, &vec![6; pagesize()]);
833        assert_page_content(&swap_file, 3, &vec![7; pagesize()]);
834        assert_page_content(&swap_file, 4, &vec![8; pagesize()]);
835        assert!(swap_file.page_content(5, false).unwrap().is_none());
836
837        let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
838            .collect::<std::result::Result<Vec<_>, _>>();
839        assert_eq!(data, Ok(vec![0..5 * pagesize() as u64]));
840    }
841
842    #[test]
843    #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
844    fn lock_and_start_populate() {
845        let file = tempfile::tempfile().unwrap();
846        let mut swap_file = SwapFile::new(&file, 200).unwrap();
847
848        swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
849        swap_file
850            .write_to_file(3, &vec![1; 5 * pagesize()])
851            .unwrap();
852        swap_file.write_to_file(10, &vec![1; pagesize()]).unwrap();
853
854        let mut locked_pages = 0;
855        loop {
856            let pages = swap_file.lock_and_async_prefetch(2).unwrap();
857            if pages == 0 {
858                break;
859            }
860            assert!(pages <= 2);
861            locked_pages += pages;
862        }
863        assert_eq!(locked_pages, 7);
864    }
865
866    #[test]
867    fn clear_range() {
868        let file = tempfile::tempfile().unwrap();
869        let mut swap_file = SwapFile::new(&file, 200).unwrap();
870
871        let data = &vec![1; pagesize()];
872        swap_file.write_to_file(0, data).unwrap();
873        swap_file.clear_range(0..1).unwrap();
874
875        assert!(swap_file.page_content(0, false).unwrap().is_none());
876    }
877
878    #[test]
879    #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
880    fn clear_range_unlocked_pages() {
881        let file = tempfile::tempfile().unwrap();
882        let mut swap_file = SwapFile::new(&file, 200).unwrap();
883
884        swap_file
885            .write_to_file(1, &vec![1; 10 * pagesize()])
886            .unwrap();
887        // 1..6 is locked, 6..11 is not locked.
888        assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5);
889
890        // locked pages only
891        assert_eq!(swap_file.clear_range(1..4).unwrap(), 3);
892        // locked pages + non-locked pages
893        assert_eq!(swap_file.clear_range(4..7).unwrap(), 2);
894        // non-locked pages
895        assert_eq!(swap_file.clear_range(10..11).unwrap(), 0);
896    }
897
898    #[test]
899    fn clear_range_keep_on_disk() {
900        let file = tempfile::tempfile().unwrap();
901        let mut swap_file = SwapFile::new(&file, 200).unwrap();
902
903        let data = &vec![1; pagesize()];
904        swap_file.write_to_file(0, data).unwrap();
905        swap_file.clear_range(0..1).unwrap();
906
907        let slice = swap_file.page_content(0, true).unwrap().unwrap();
908        // TODO(b/315998194): Add safety comment
909        #[allow(clippy::undocumented_unsafe_blocks)]
910        let slice = unsafe { slice::from_raw_parts(slice.as_ptr(), slice.size()) };
911        assert_eq!(slice, data);
912    }
913
914    #[test]
915    fn clear_range_out_of_range() {
916        let file = tempfile::tempfile().unwrap();
917        let mut swap_file = SwapFile::new(&file, 200).unwrap();
918        swap_file.write_to_file(199, &vec![0; pagesize()]).unwrap();
919
920        match swap_file.clear_range(199..201) {
921            Err(Error::OutOfRange) => {}
922            _ => unreachable!("not out of range"),
923        };
924        assert!(swap_file.clear_range(199..200).is_ok());
925        match swap_file.clear_range(200..201) {
926            Err(Error::OutOfRange) => {}
927            _ => unreachable!("not out of range"),
928        };
929    }
930
931    #[test]
932    fn free_range() {
933        let file = tempfile::tempfile().unwrap();
934        let mut swap_file = SwapFile::new(&file, 200).unwrap();
935
936        let data = &vec![1; pagesize()];
937        swap_file.write_to_file(0, data).unwrap();
938        swap_file.free_range(0..1).unwrap();
939
940        assert!(swap_file.page_content(0, false).unwrap().is_none());
941        assert!(swap_file.page_content(0, true).unwrap().is_none());
942    }
943
944    #[test]
945    #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
946    fn free_range_unlocked_pages() {
947        let file = tempfile::tempfile().unwrap();
948        let mut swap_file = SwapFile::new(&file, 200).unwrap();
949
950        swap_file
951            .write_to_file(1, &vec![1; 10 * pagesize()])
952            .unwrap();
953        // 1..6 is locked, 6..11 is not locked.
954        assert_eq!(swap_file.lock_and_async_prefetch(5).unwrap(), 5);
955
956        // empty pages
957        assert_eq!(swap_file.free_range(0..1).unwrap(), 0);
958        // empty pages + locked pages
959        assert_eq!(swap_file.free_range(0..2).unwrap(), 1);
960        // locked pages only
961        assert_eq!(swap_file.free_range(2..4).unwrap(), 2);
962        // empty pages + locked pages + non-locked pages
963        assert_eq!(swap_file.free_range(3..7).unwrap(), 2);
964        // non-locked pages
965        assert_eq!(swap_file.free_range(10..11).unwrap(), 0);
966    }
967
968    #[test]
969    fn free_range_out_of_range() {
970        let file = tempfile::tempfile().unwrap();
971        let mut swap_file = SwapFile::new(&file, 200).unwrap();
972
973        assert_eq!(swap_file.free_range(199..200).is_ok(), true);
974        match swap_file.free_range(200..201) {
975            Err(Error::OutOfRange) => {}
976            _ => unreachable!("not out of range"),
977        };
978        match swap_file.free_range(199..201) {
979            Err(Error::OutOfRange) => {}
980            _ => unreachable!("not out of range"),
981        };
982    }
983
984    #[test]
985    fn free_range_and_write() {
986        let file = tempfile::tempfile().unwrap();
987        let mut swap_file = SwapFile::new(&file, 200).unwrap();
988
989        let data = &vec![1; 5 * pagesize()];
990        swap_file.write_to_file(0, data).unwrap();
991        swap_file.free_range(0..5).unwrap();
992
993        swap_file
994            .write_to_file(0, &vec![2; 2 * pagesize()])
995            .unwrap();
996        swap_file
997            .write_to_file(5, &vec![3; 4 * pagesize()])
998            .unwrap();
999
1000        assert_page_content(&swap_file, 0, &vec![2; pagesize()]);
1001        assert_page_content(&swap_file, 1, &vec![2; pagesize()]);
1002        assert!(swap_file.page_content(2, true).unwrap().is_none());
1003        assert!(swap_file.page_content(3, true).unwrap().is_none());
1004        assert!(swap_file.page_content(4, true).unwrap().is_none());
1005        assert_page_content(&swap_file, 5, &vec![3; pagesize()]);
1006        assert_page_content(&swap_file, 6, &vec![3; pagesize()]);
1007        assert_page_content(&swap_file, 7, &vec![3; pagesize()]);
1008        assert_page_content(&swap_file, 8, &vec![3; pagesize()]);
1009        assert!(swap_file.page_content(9, true).unwrap().is_none());
1010
1011        let data = FileDataIterator::new(&file, 0, file.metadata().unwrap().len())
1012            .collect::<std::result::Result<Vec<_>, _>>();
1013        assert_eq!(data, Ok(vec![0..6 * pagesize() as u64]));
1014    }
1015
1016    #[test]
1017    #[cfg(target_arch = "x86_64")] // TODO(b/272612118): unit test infra (qemu-user) support
1018    fn clear_mlock() {
1019        let file = tempfile::tempfile().unwrap();
1020        let mut swap_file = SwapFile::new(&file, 200).unwrap();
1021
1022        swap_file
1023            .write_to_file(1, &vec![1; 10 * pagesize()])
1024            .unwrap();
1025        // success if there is no mlock.
1026        assert!(swap_file.clear_mlock().is_ok());
1027
1028        assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10);
1029        // success if there is mlocked area.
1030        assert!(swap_file.clear_mlock().is_ok());
1031
1032        // mlock area is cleared.
1033        assert_eq!(swap_file.lock_and_async_prefetch(11).unwrap(), 10);
1034    }
1035
1036    #[test]
1037    fn first_data_range() {
1038        let file = tempfile::tempfile().unwrap();
1039        let mut swap_file = SwapFile::new(&file, 200).unwrap();
1040
1041        swap_file
1042            .write_to_file(1, &vec![1; 2 * pagesize()])
1043            .unwrap();
1044        swap_file.write_to_file(3, &vec![2; pagesize()]).unwrap();
1045
1046        assert_eq!(swap_file.first_data_range(200).unwrap(), 1..4);
1047        assert_eq!(swap_file.first_data_range(2).unwrap(), 1..3);
1048        assert_eq!(swap_file.first_data_range(1).unwrap(), 1..2);
1049        swap_file.clear_range(1..3).unwrap();
1050        assert_eq!(swap_file.first_data_range(2).unwrap(), 3..4);
1051        swap_file.clear_range(3..4).unwrap();
1052        assert!(swap_file.first_data_range(2).is_none());
1053    }
1054
1055    #[test]
1056    fn get_slice() {
1057        let file = tempfile::tempfile().unwrap();
1058        let mut swap_file = SwapFile::new(&file, 200).unwrap();
1059
1060        swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
1061        swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap();
1062
1063        let slice = swap_file.get_slice(1..3).unwrap();
1064        assert_eq!(slice.size(), 2 * pagesize());
1065        let mut buf = vec![0u8; pagesize()];
1066        slice.get_slice(0, pagesize()).unwrap().copy_to(&mut buf);
1067        assert_eq!(buf, vec![1; pagesize()]);
1068
1069        let mut buf = vec![0u8; pagesize()];
1070        slice
1071            .get_slice(pagesize(), pagesize())
1072            .unwrap()
1073            .copy_to(&mut buf);
1074        assert_eq!(buf, vec![2; pagesize()]);
1075    }
1076
1077    #[test]
1078    fn get_slice_out_of_range() {
1079        let file = tempfile::tempfile().unwrap();
1080        let swap_file = SwapFile::new(&file, 200).unwrap();
1081
1082        match swap_file.get_slice(200..201) {
1083            Err(Error::OutOfRange) => {}
1084            other => {
1085                unreachable!("unexpected result {:?}", other);
1086            }
1087        }
1088    }
1089
1090    #[test]
1091    fn present_pages() {
1092        let file = tempfile::tempfile().unwrap();
1093        let mut swap_file = SwapFile::new(&file, 200).unwrap();
1094
1095        swap_file.write_to_file(1, &vec![1; pagesize()]).unwrap();
1096        swap_file.write_to_file(2, &vec![2; pagesize()]).unwrap();
1097
1098        assert_eq!(swap_file.present_pages(), 2);
1099    }
1100}