devices/virtio/
pmem.rs

1// Copyright 2019 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::collections::BTreeMap;
6use std::fs::File;
7use std::io;
8use std::mem::size_of;
9use std::time::Duration;
10
11use anyhow::anyhow;
12use anyhow::Context;
13use base::error;
14use base::AsRawDescriptor;
15use base::Error as SysError;
16use base::Event;
17use base::RawDescriptor;
18use base::Result as SysResult;
19use base::Timer;
20use base::Tube;
21use base::TubeError;
22use base::WorkerThread;
23use cros_async::select2;
24use cros_async::select3;
25use cros_async::AsyncError;
26use cros_async::EventAsync;
27use cros_async::Executor;
28use cros_async::TimerAsync;
29use data_model::Le32;
30use data_model::Le64;
31use futures::pin_mut;
32use remain::sorted;
33use snapshot::AnySnapshot;
34use thiserror::Error;
35use vm_control::MemSlot;
36use vm_control::VmMemoryMappingRequest;
37use vm_control::VmMemoryMappingResponse;
38use vm_memory::GuestAddress;
39use vm_memory::GuestMemory;
40use zerocopy::FromBytes;
41use zerocopy::Immutable;
42use zerocopy::IntoBytes;
43use zerocopy::KnownLayout;
44
45use super::async_utils;
46use super::copy_config;
47use super::DescriptorChain;
48use super::DeviceType;
49use super::Interrupt;
50use super::Queue;
51use super::VirtioDevice;
52
53const QUEUE_SIZE: u16 = 256;
54const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
55
56/* Feature bits */
57const VIRTIO_PMEM_F_DISCARD: u32 = 63;
58
59const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
60const VIRTIO_PMEM_REQ_TYPE_DISCARD: u32 = u32::MAX;
61const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
62const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
63
64#[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
65#[repr(C)]
66struct virtio_pmem_config {
67    start_address: Le64,
68    size: Le64,
69}
70
71#[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
72#[repr(C)]
73struct virtio_pmem_resp {
74    status_code: Le32,
75}
76
77#[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
78#[repr(C)]
79struct virtio_pmem_req {
80    type_: Le32,
81}
82
83#[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
84#[repr(C)]
85struct virtio_pmem_range_req {
86    type_: Le32,
87    padding_: Le32,
88    start_address: Le64,
89    size: Le64,
90}
91
92#[sorted]
93#[derive(Error, Debug)]
94enum Error {
95    /// Failed to get value from pageout timer.
96    #[error("failed to get value from pageout timer: {0}")]
97    PageoutTimer(AsyncError),
98    /// Failed to read from virtqueue.
99    #[error("failed to read from virtqueue: {0}")]
100    ReadQueue(io::Error),
101    /// Failed to receive tube response.
102    #[error("failed to receive tube response: {0}")]
103    ReceiveResponse(TubeError),
104    /// Failed to send tube request.
105    #[error("failed to send tube request: {0}")]
106    SendingRequest(TubeError),
107    /// Failed to write to virtqueue.
108    #[error("failed to write to virtqueue: {0}")]
109    WriteQueue(io::Error),
110}
111
112type Result<T> = ::std::result::Result<T, Error>;
113
114async fn pageout(
115    ex: &Executor,
116    swap_interval: Duration,
117    pmem_device_tube: &Tube,
118    mapping_arena_slot: u32,
119    mapping_size: usize,
120) -> Result<()> {
121    let timer = Timer::new().expect("Failed to create a timer");
122    let mut pageout_timer =
123        TimerAsync::new(timer, ex).expect("Failed to create an async pageout timer");
124    pageout_timer
125        .reset_repeating(swap_interval)
126        .expect("Failed to reset pageout timer");
127
128    loop {
129        pageout_timer.wait().await.map_err(Error::PageoutTimer)?;
130        let request = VmMemoryMappingRequest::MadvisePageout {
131            slot: mapping_arena_slot,
132            offset: 0,
133            size: mapping_size,
134        };
135
136        pmem_device_tube
137            .send(&request)
138            .map_err(Error::SendingRequest)?;
139        match pmem_device_tube
140            .recv::<VmMemoryMappingResponse>()
141            .map_err(Error::ReceiveResponse)?
142        {
143            VmMemoryMappingResponse::Ok => {}
144            VmMemoryMappingResponse::Err(e) => {
145                error!("failed to page out the memory mapping: {}", e);
146            }
147        };
148    }
149}
150
151fn execute_request(
152    request_type: u32,
153    start_address: u64,
154    size: u64,
155    pmem_device_tube: &Tube,
156    mapping_arena_slot: u32,
157    mapping_size: usize,
158) -> u32 {
159    match request_type {
160        VIRTIO_PMEM_REQ_TYPE_FLUSH => {
161            let request = VmMemoryMappingRequest::MsyncArena {
162                slot: mapping_arena_slot,
163                offset: 0, // The pmem backing file is always at offset 0 in the arena.
164                size: mapping_size,
165            };
166
167            if let Err(e) = pmem_device_tube.send(&request) {
168                error!("failed to send request: {}", e);
169                return VIRTIO_PMEM_RESP_TYPE_EIO;
170            }
171
172            match pmem_device_tube.recv() {
173                Ok(response) => match response {
174                    VmMemoryMappingResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
175                    VmMemoryMappingResponse::Err(e) => {
176                        error!("failed flushing disk image: {}", e);
177                        VIRTIO_PMEM_RESP_TYPE_EIO
178                    }
179                },
180                Err(e) => {
181                    error!("failed to receive data: {}", e);
182                    VIRTIO_PMEM_RESP_TYPE_EIO
183                }
184            }
185        }
186
187        VIRTIO_PMEM_REQ_TYPE_DISCARD => {
188            let request = VmMemoryMappingRequest::MadviseRemove {
189                slot: mapping_arena_slot,
190                offset: usize::try_from(start_address).unwrap(),
191                size: usize::try_from(size).unwrap(),
192            };
193
194            if let Err(e) = pmem_device_tube.send(&request) {
195                error!("failed to send request: {}", e);
196                return VIRTIO_PMEM_RESP_TYPE_EIO;
197            }
198
199            match pmem_device_tube.recv() {
200                Ok(response) => match response {
201                    VmMemoryMappingResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
202                    VmMemoryMappingResponse::Err(e) => {
203                        error!("failed to discard memory range: {}", e);
204                        VIRTIO_PMEM_RESP_TYPE_EIO
205                    }
206                },
207                Err(e) => {
208                    error!("failed to receive data: {}", e);
209                    VIRTIO_PMEM_RESP_TYPE_EIO
210                }
211            }
212        }
213
214        _ => {
215            error!("unknown request type: {}", request_type);
216            VIRTIO_PMEM_RESP_TYPE_EIO
217        }
218    }
219}
220
221fn handle_request(
222    avail_desc: &mut DescriptorChain,
223    pmem_device_tube: &Tube,
224    mapping_arena_slot: u32,
225    mapping_size: usize,
226) -> Result<usize> {
227    let (request_type, start_address, size) =
228        if avail_desc.reader.available_bytes() == size_of::<virtio_pmem_req>() {
229            let request = avail_desc
230                .reader
231                .read_obj::<virtio_pmem_req>()
232                .map_err(Error::ReadQueue)?;
233            (request.type_.to_native(), 0, 0)
234        } else {
235            let request = avail_desc
236                .reader
237                .read_obj::<virtio_pmem_range_req>()
238                .map_err(Error::ReadQueue)?;
239            (
240                request.type_.to_native(),
241                request.start_address.to_native(),
242                request.size.to_native(),
243            )
244        };
245    let status_code = execute_request(
246        request_type,
247        start_address,
248        size,
249        pmem_device_tube,
250        mapping_arena_slot,
251        mapping_size,
252    );
253
254    let response = virtio_pmem_resp {
255        status_code: status_code.into(),
256    };
257
258    avail_desc
259        .writer
260        .write_obj(response)
261        .map_err(Error::WriteQueue)?;
262
263    Ok(avail_desc.writer.bytes_written())
264}
265
266async fn handle_queue(
267    queue: &mut Queue,
268    mut queue_event: EventAsync,
269    pmem_device_tube: &Tube,
270    mapping_arena_slot: u32,
271    mapping_size: usize,
272) {
273    loop {
274        let mut avail_desc = match queue.next_async(&mut queue_event).await {
275            Err(e) => {
276                error!("Failed to read descriptor {}", e);
277                return;
278            }
279            Ok(d) => d,
280        };
281
282        let written = match handle_request(
283            &mut avail_desc,
284            pmem_device_tube,
285            mapping_arena_slot,
286            mapping_size,
287        ) {
288            Ok(n) => n,
289            Err(e) => {
290                error!("pmem: failed to handle request: {}", e);
291                0
292            }
293        };
294        queue.add_used_with_bytes_written(avail_desc, written as u32);
295        queue.trigger_interrupt();
296    }
297}
298
299fn run_worker(
300    queue: &mut Queue,
301    pmem_device_tube: &Tube,
302    kill_evt: Event,
303    mapping_arena_slot: u32,
304    mapping_size: usize,
305    swap_interval: Option<Duration>,
306) {
307    let ex = Executor::new().unwrap();
308
309    let queue_evt = queue
310        .event()
311        .try_clone()
312        .expect("failed to clone queue event");
313    let queue_evt = EventAsync::new(queue_evt, &ex).expect("failed to set up the queue event");
314
315    // Process requests from the virtio queue.
316    let queue_fut = handle_queue(
317        queue,
318        queue_evt,
319        pmem_device_tube,
320        mapping_arena_slot,
321        mapping_size,
322    );
323    pin_mut!(queue_fut);
324
325    // Exit if the kill event is triggered.
326    let kill = async_utils::await_and_exit(&ex, kill_evt);
327    pin_mut!(kill);
328
329    let interval = swap_interval.unwrap_or(Duration::ZERO);
330    if interval.is_zero() {
331        if let Err(e) = ex.run_until(select2(queue_fut, kill)) {
332            error!("error happened in executor: {}", e);
333        }
334    } else {
335        let pageout_fut = pageout(
336            &ex,
337            interval,
338            pmem_device_tube,
339            mapping_arena_slot,
340            mapping_size,
341        );
342        pin_mut!(pageout_fut);
343        if let Err(e) = ex.run_until(select3(queue_fut, kill, pageout_fut)) {
344            error!("error happened in executor: {}", e);
345        }
346    }
347}
348
349/// Specifies how memory slot is initialized.
350pub enum MemSlotConfig {
351    /// The memory region has already been mapped to the guest.
352    MemSlot {
353        /// index of the guest-mapped memory regions.
354        idx: MemSlot,
355    },
356    /// The memory region that is not initialized yet and whose slot index will be provided via
357    /// `Tube` later. e.g. pmem-ext2 device, where fs construction will be done in the main
358    /// process.
359    LazyInit { tube: Tube },
360}
361
362pub struct Pmem {
363    worker_thread: Option<WorkerThread<(Queue, Tube)>>,
364    features: u64,
365    disk_image: Option<File>,
366    mapping_address: GuestAddress,
367    mem_slot: MemSlotConfig,
368    mapping_size: u64,
369    pmem_device_tube: Option<Tube>,
370    swap_interval: Option<Duration>,
371}
372
373#[derive(serde::Serialize, serde::Deserialize)]
374struct PmemSnapshot {
375    mapping_address: GuestAddress,
376    mapping_size: u64,
377}
378
379/// Configuration of a virtio-pmem device.
380pub struct PmemConfig {
381    /// Disk image exposed to the guest.
382    /// If the memory region is not backed by a file, this should be `None`.
383    pub disk_image: Option<File>,
384    /// Guest physical address where the memory will be mapped.
385    pub mapping_address: GuestAddress,
386    pub mem_slot: MemSlotConfig,
387    /// The size of the mapped region.
388    pub mapping_size: u64,
389    /// A communication channel to the main process to send memory requests.
390    pub pmem_device_tube: Tube,
391    /// Interval for periodic swap out of memory mapping
392    pub swap_interval: Option<Duration>,
393    /// Whether the region is writeble or not.
394    pub mapping_writable: bool,
395}
396
397impl Pmem {
398    pub fn new(base_features: u64, cfg: PmemConfig) -> SysResult<Pmem> {
399        if cfg.mapping_size > usize::MAX as u64 {
400            return Err(SysError::new(libc::EOVERFLOW));
401        }
402
403        let mut avail_features = base_features;
404        if cfg.mapping_writable {
405            if let MemSlotConfig::LazyInit { .. } = cfg.mem_slot {
406                error!("pmem-ext2 must be a read-only device");
407                return Err(SysError::new(libc::EINVAL));
408            }
409
410            avail_features |= 1 << VIRTIO_PMEM_F_DISCARD;
411        }
412
413        Ok(Pmem {
414            worker_thread: None,
415            features: avail_features,
416            disk_image: cfg.disk_image,
417            mapping_address: cfg.mapping_address,
418            mem_slot: cfg.mem_slot,
419            mapping_size: cfg.mapping_size,
420            pmem_device_tube: Some(cfg.pmem_device_tube),
421            swap_interval: cfg.swap_interval,
422        })
423    }
424}
425
426impl VirtioDevice for Pmem {
427    fn keep_rds(&self) -> Vec<RawDescriptor> {
428        let mut keep_rds = Vec::new();
429        if let Some(disk_image) = &self.disk_image {
430            keep_rds.push(disk_image.as_raw_descriptor());
431        }
432
433        if let Some(ref pmem_device_tube) = self.pmem_device_tube {
434            keep_rds.push(pmem_device_tube.as_raw_descriptor());
435        }
436
437        if let MemSlotConfig::LazyInit { tube } = &self.mem_slot {
438            keep_rds.push(tube.as_raw_descriptor());
439        }
440
441        keep_rds
442    }
443
444    fn device_type(&self) -> DeviceType {
445        DeviceType::Pmem
446    }
447
448    fn queue_max_sizes(&self) -> &[u16] {
449        QUEUE_SIZES
450    }
451
452    fn features(&self) -> u64 {
453        self.features
454    }
455
456    fn read_config(&self, offset: u64, data: &mut [u8]) {
457        let config = virtio_pmem_config {
458            start_address: Le64::from(self.mapping_address.offset()),
459            size: Le64::from(self.mapping_size),
460        };
461        copy_config(data, 0, config.as_bytes(), offset);
462    }
463
464    fn activate(
465        &mut self,
466        _memory: GuestMemory,
467        _interrupt: Interrupt,
468        mut queues: BTreeMap<usize, Queue>,
469    ) -> anyhow::Result<()> {
470        if queues.len() != 1 {
471            return Err(anyhow!("expected 1 queue, got {}", queues.len()));
472        }
473
474        let mut queue = queues.remove(&0).unwrap();
475
476        // We checked that this fits in a usize in `Pmem::new`.
477        let mapping_size = self.mapping_size as usize;
478
479        let pmem_device_tube = self
480            .pmem_device_tube
481            .take()
482            .context("missing pmem device tube")?;
483
484        let swap_interval = self.swap_interval;
485
486        let mapping_arena_slot = match &self.mem_slot {
487            MemSlotConfig::MemSlot { idx } => *idx,
488            MemSlotConfig::LazyInit { tube } => tube
489                .recv::<u32>()
490                .context("failed to receive memory slot for ext2 pmem device")?,
491        };
492
493        self.worker_thread = Some(WorkerThread::start("v_pmem", move |kill_event| {
494            run_worker(
495                &mut queue,
496                &pmem_device_tube,
497                kill_event,
498                mapping_arena_slot,
499                mapping_size,
500                swap_interval,
501            );
502            (queue, pmem_device_tube)
503        }));
504
505        Ok(())
506    }
507
508    fn reset(&mut self) -> anyhow::Result<()> {
509        if let Some(worker_thread) = self.worker_thread.take() {
510            let (_queue, pmem_device_tube) = worker_thread.stop();
511            self.pmem_device_tube = Some(pmem_device_tube);
512        }
513        Ok(())
514    }
515
516    fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> {
517        if let Some(worker_thread) = self.worker_thread.take() {
518            let (queue, pmem_device_tube) = worker_thread.stop();
519            self.pmem_device_tube = Some(pmem_device_tube);
520            return Ok(Some(BTreeMap::from([(0, queue)])));
521        }
522        Ok(None)
523    }
524
525    fn virtio_wake(
526        &mut self,
527        queues_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>,
528    ) -> anyhow::Result<()> {
529        if let Some((mem, interrupt, queues)) = queues_state {
530            self.activate(mem, interrupt, queues)?;
531        }
532        Ok(())
533    }
534
535    fn virtio_snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
536        AnySnapshot::to_any(PmemSnapshot {
537            mapping_address: self.mapping_address,
538            mapping_size: self.mapping_size,
539        })
540        .context("failed to serialize pmem snapshot")
541    }
542
543    fn virtio_restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
544        let snapshot: PmemSnapshot =
545            AnySnapshot::from_any(data).context("failed to deserialize pmem snapshot")?;
546        anyhow::ensure!(
547            snapshot.mapping_address == self.mapping_address
548                && snapshot.mapping_size == self.mapping_size,
549            "pmem snapshot doesn't match config: expected {:?}, got {:?}",
550            (self.mapping_address, self.mapping_size),
551            (snapshot.mapping_address, snapshot.mapping_size),
552        );
553        Ok(())
554    }
555}