vm_control/
lib.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Handles IPC for controlling the main VM process.
6//!
7//! The VM Control IPC protocol is synchronous, meaning that each `VmRequest` sent over a connection
8//! will receive a `VmResponse` for that request next time data is received over that connection.
9//!
10//! The wire message format is a little-endian C-struct of fixed size, along with a file descriptor
11//! if the request type expects one.
12
13pub mod api;
14#[cfg(feature = "gdb")]
15pub mod gdb;
16#[cfg(feature = "gpu")]
17pub mod gpu;
18
19use base::debug;
20#[cfg(any(target_os = "android", target_os = "linux"))]
21use base::linux::MemoryMappingBuilderUnix;
22#[cfg(any(target_os = "android", target_os = "linux"))]
23use base::sys::call_with_extended_max_files;
24#[cfg(any(target_os = "android", target_os = "linux"))]
25use base::MemoryMappingArena;
26#[cfg(windows)]
27use base::MemoryMappingBuilderWindows;
28use hypervisor::BalloonEvent;
29use hypervisor::MemCacheType;
30use hypervisor::MemRegion;
31use snapshot::AnySnapshot;
32
33#[cfg(feature = "balloon")]
34mod balloon_tube;
35pub mod client;
36pub mod sys;
37
38#[cfg(target_arch = "x86_64")]
39use std::arch::x86_64::_rdtsc;
40use std::collections::BTreeMap;
41use std::collections::BTreeSet;
42use std::collections::HashMap;
43use std::convert::TryInto;
44use std::fmt;
45use std::fmt::Display;
46use std::fs::File;
47use std::path::Path;
48use std::path::PathBuf;
49use std::result::Result as StdResult;
50use std::str::FromStr;
51use std::sync::mpsc;
52use std::sync::Arc;
53use std::time::Instant;
54
55use anyhow::bail;
56use anyhow::Context;
57use base::error;
58use base::info;
59use base::warn;
60use base::with_as_descriptor;
61use base::AsRawDescriptor;
62use base::Descriptor;
63use base::Error as SysError;
64use base::Event;
65use base::ExternalMapping;
66use base::IntoRawDescriptor;
67use base::MappedRegion;
68use base::MemoryMappingBuilder;
69use base::MmapError;
70use base::Protection;
71use base::Result;
72use base::SafeDescriptor;
73use base::SharedMemory;
74use base::Tube;
75use hypervisor::Datamatch;
76use hypervisor::IoEventAddress;
77use hypervisor::IrqRoute;
78use hypervisor::IrqSource;
79pub use hypervisor::MemSlot;
80use hypervisor::Vm;
81use hypervisor::VmCap;
82use libc::EINVAL;
83use libc::EIO;
84use libc::ENODEV;
85use libc::ENOTSUP;
86use libc::ERANGE;
87#[cfg(feature = "registered_events")]
88use protos::registered_events;
89use remain::sorted;
90use resources::Alloc;
91use resources::SystemAllocator;
92use rutabaga_gfx::DeviceId;
93use rutabaga_gfx::RutabagaDescriptor;
94use rutabaga_gfx::RutabagaFromRawDescriptor;
95use rutabaga_gfx::RutabagaGralloc;
96use rutabaga_gfx::RutabagaHandle;
97use rutabaga_gfx::RutabagaMappedRegion;
98use rutabaga_gfx::VulkanInfo;
99use serde::de::Error;
100use serde::Deserialize;
101use serde::Serialize;
102use snapshot::SnapshotReader;
103use snapshot::SnapshotWriter;
104use swap::SwapStatus;
105use sync::Mutex;
106#[cfg(any(target_os = "android", target_os = "linux"))]
107pub use sys::FsMappingRequest;
108#[cfg(windows)]
109pub use sys::InitialAudioSessionState;
110#[cfg(any(target_os = "android", target_os = "linux"))]
111pub use sys::VmMemoryMappingRequest;
112#[cfg(any(target_os = "android", target_os = "linux"))]
113pub use sys::VmMemoryMappingResponse;
114use thiserror::Error;
115pub use vm_control_product::GpuSendToMain;
116pub use vm_control_product::GpuSendToService;
117pub use vm_control_product::ServiceSendToGpu;
118use vm_memory::GuestAddress;
119
120#[cfg(feature = "balloon")]
121pub use crate::balloon_tube::BalloonControlCommand;
122#[cfg(feature = "balloon")]
123pub use crate::balloon_tube::BalloonTube;
124#[cfg(feature = "gdb")]
125pub use crate::gdb::VcpuDebug;
126#[cfg(feature = "gdb")]
127pub use crate::gdb::VcpuDebugStatus;
128#[cfg(feature = "gdb")]
129pub use crate::gdb::VcpuDebugStatusMessage;
130#[cfg(feature = "gpu")]
131use crate::gpu::GpuControlCommand;
132#[cfg(feature = "gpu")]
133use crate::gpu::GpuControlResult;
134
135/// Control the state of a particular VM CPU.
136#[derive(Clone, Debug)]
137pub enum VcpuControl {
138    #[cfg(feature = "gdb")]
139    Debug(VcpuDebug),
140    RunState(VmRunMode),
141    MakeRT,
142    // Request the current state of the vCPU. The result is sent back over the included channel.
143    GetStates(mpsc::Sender<VmRunMode>),
144    // Request the vcpu write a snapshot of itself to the writer, then send a `Result` back over
145    // the channel after completion/failure.
146    Snapshot(SnapshotWriter, mpsc::Sender<anyhow::Result<()>>),
147    Restore(VcpuRestoreRequest),
148    #[cfg(any(target_os = "android", target_os = "linux"))]
149    Throttle(u32),
150}
151
152/// Request to restore a Vcpu from a given snapshot, and report the results
153/// back via the provided channel.
154#[derive(Clone, Debug)]
155pub struct VcpuRestoreRequest {
156    pub result_sender: mpsc::Sender<anyhow::Result<()>>,
157    pub snapshot_reader: SnapshotReader,
158    #[cfg(target_arch = "x86_64")]
159    pub host_tsc_reference_moment: u64,
160}
161
162/// Mode of execution for the VM.
163#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
164pub enum VmRunMode {
165    /// The default run mode indicating the VCPUs are running.
166    #[default]
167    Running,
168    /// Indicates that the VCPUs are suspending execution until the `Running` mode is set.
169    Suspending,
170    /// Indicates that the VM is exiting all processes.
171    Exiting,
172    /// Indicates that the VM is in a breakpoint waiting for the debugger to do continue.
173    Breakpoint,
174}
175
176impl Display for VmRunMode {
177    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
178        use self::VmRunMode::*;
179
180        match self {
181            Running => write!(f, "running"),
182            Suspending => write!(f, "suspending"),
183            Exiting => write!(f, "exiting"),
184            Breakpoint => write!(f, "breakpoint"),
185        }
186    }
187}
188
189// Trait for devices that get notification on specific GPE trigger
190pub trait GpeNotify: Send {
191    fn notify(&mut self) {}
192}
193
194// Trait for devices that get notification on specific PCI PME
195pub trait PmeNotify: Send {
196    fn notify(&mut self, _requester_id: u16) {}
197}
198
199pub trait PmResource {
200    fn pwrbtn_evt(&mut self) {}
201    fn slpbtn_evt(&mut self) {}
202    fn rtc_evt(&mut self, _clear_evt: Event) {}
203    fn gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>) {}
204    fn pme_evt(&mut self, _requester_id: u16) {}
205    fn register_gpe_notify_dev(&mut self, _gpe: u32, _notify_dev: Arc<Mutex<dyn GpeNotify>>) {}
206    fn register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>) {}
207}
208
209/// The maximum number of devices that can be listed in one `UsbControlCommand`.
210///
211/// This value was set to be equal to `xhci_regs::MAX_PORTS` for convenience, but it is not
212/// necessary for correctness. Importing that value directly would be overkill because it would
213/// require adding a big dependency for a single const.
214pub const USB_CONTROL_MAX_PORTS: usize = 16;
215
216#[derive(Serialize, Deserialize, Debug)]
217pub enum DiskControlCommand {
218    /// Resize a disk to `new_size` in bytes.
219    Resize { new_size: u64 },
220}
221
222impl Display for DiskControlCommand {
223    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
224        use self::DiskControlCommand::*;
225
226        match self {
227            Resize { new_size } => write!(f, "disk_resize {new_size}"),
228        }
229    }
230}
231
232#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
233pub enum DiskControlResult {
234    Ok,
235    Err(SysError),
236}
237
238/// Net control commands for adding and removing tap devices.
239#[cfg(feature = "pci-hotplug")]
240#[derive(Serialize, Deserialize, Debug)]
241pub enum NetControlCommand {
242    AddTap(String),
243    RemoveTap(u8),
244}
245
246#[derive(Serialize, Deserialize, Debug)]
247pub enum UsbControlCommand {
248    AttachDevice {
249        #[serde(with = "with_as_descriptor")]
250        file: File,
251    },
252    AttachSecurityKey {
253        #[serde(with = "with_as_descriptor")]
254        file: File,
255    },
256    DetachDevice {
257        port: u8,
258    },
259    ListDevice {
260        ports: [u8; USB_CONTROL_MAX_PORTS],
261    },
262}
263
264#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default)]
265pub struct UsbControlAttachedDevice {
266    pub port: u8,
267    pub vendor_id: u16,
268    pub product_id: u16,
269}
270
271impl UsbControlAttachedDevice {
272    pub fn valid(self) -> bool {
273        self.port != 0
274    }
275}
276
277#[cfg(feature = "pci-hotplug")]
278#[derive(Serialize, Deserialize, Debug, Clone)]
279#[must_use]
280/// Result for hotplug and removal of PCI device.
281pub enum PciControlResult {
282    AddOk { bus: u8 },
283    ErrString(String),
284    RemoveOk,
285}
286
287#[cfg(feature = "pci-hotplug")]
288impl Display for PciControlResult {
289    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
290        use self::PciControlResult::*;
291
292        match self {
293            AddOk { bus } => write!(f, "add_ok {bus}"),
294            ErrString(e) => write!(f, "error: {e}"),
295            RemoveOk => write!(f, "remove_ok"),
296        }
297    }
298}
299
300#[derive(Serialize, Deserialize, Debug, Clone)]
301pub enum UsbControlResult {
302    Ok { port: u8 },
303    NoAvailablePort,
304    NoSuchDevice,
305    NoSuchPort,
306    FailedToOpenDevice,
307    Devices([UsbControlAttachedDevice; USB_CONTROL_MAX_PORTS]),
308    FailedToInitHostDevice,
309}
310
311impl Display for UsbControlResult {
312    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
313        use self::UsbControlResult::*;
314
315        match self {
316            UsbControlResult::Ok { port } => write!(f, "ok {port}"),
317            NoAvailablePort => write!(f, "no_available_port"),
318            NoSuchDevice => write!(f, "no_such_device"),
319            NoSuchPort => write!(f, "no_such_port"),
320            FailedToOpenDevice => write!(f, "failed_to_open_device"),
321            Devices(devices) => {
322                write!(f, "devices")?;
323                for d in devices.iter().filter(|d| d.valid()) {
324                    write!(f, " {} {:04x} {:04x}", d.port, d.vendor_id, d.product_id)?;
325                }
326                std::result::Result::Ok(())
327            }
328            FailedToInitHostDevice => write!(f, "failed_to_init_host_device"),
329        }
330    }
331}
332
333/// Commands for snapshot feature
334#[derive(Serialize, Deserialize, Debug)]
335pub enum SnapshotCommand {
336    Take {
337        snapshot_path: PathBuf,
338        compress_memory: bool,
339        encrypt: bool,
340    },
341}
342
343/// Commands for actions on devices and the devices control thread.
344#[derive(Serialize, Deserialize, Debug)]
345pub enum DeviceControlCommand {
346    SleepDevices,
347    WakeDevices,
348    SnapshotDevices { snapshot_writer: SnapshotWriter },
349    RestoreDevices { snapshot_reader: SnapshotReader },
350    GetDevicesState,
351    Exit,
352}
353
354/// Commands to control the IRQ handler thread.
355#[derive(Serialize, Deserialize)]
356pub enum IrqHandlerRequest {
357    /// No response is sent for this command.
358    AddIrqControlTubes(Vec<Tube>),
359    /// Refreshes the set of event tokens (Events) from the Irqchip that the IRQ
360    /// handler waits on to forward IRQs to their final destination (e.g. via
361    /// Irqchip::service_irq_event).
362    ///
363    /// If the set of tokens exposed by the Irqchip changes while the VM is
364    /// running (such as for snapshot restore), this command must be sent
365    /// otherwise the VM will not receive IRQs as expected.
366    RefreshIrqEventTokens,
367    WakeAndNotifyIteration,
368    /// No response is sent for this command.
369    Exit,
370}
371
372const EXPECTED_MAX_IRQ_FLUSH_ITERATIONS: usize = 100;
373
374/// Response for [IrqHandlerRequest].
375#[derive(Serialize, Deserialize, Debug)]
376pub enum IrqHandlerResponse {
377    /// Sent when the IRQ event tokens have been refreshed.
378    IrqEventTokenRefreshComplete,
379    /// Specifies the number of tokens serviced in the requested iteration
380    /// (less the token for the `WakeAndNotifyIteration` request).
381    HandlerIterationComplete(usize),
382}
383
384/// Source of a `VmMemoryRequest::RegisterMemory` mapping.
385#[derive(Serialize, Deserialize)]
386pub enum VmMemorySource {
387    /// Register shared memory represented by the given descriptor.
388    /// On Windows, descriptor MUST be a mapping handle.
389    SharedMemory(SharedMemory),
390    /// Register a file mapping from the given descriptor.
391    Descriptor {
392        /// File descriptor to map.
393        descriptor: SafeDescriptor,
394        /// Offset within the file in bytes.
395        offset: u64,
396        /// Size of the mapping in bytes.
397        size: u64,
398    },
399    /// Register memory mapped by Vulkano.
400    Vulkan {
401        descriptor: SafeDescriptor,
402        handle_type: u32,
403        memory_idx: u32,
404        device_uuid: [u8; 16],
405        driver_uuid: [u8; 16],
406        size: u64,
407    },
408    /// Register the current rutabaga external mapping.
409    ExternalMapping { ptr: u64, size: u64 },
410}
411
412// The following are wrappers to avoid base dependencies in the rutabaga crate
413fn to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor {
414    // SAFETY:
415    // Safe because we own the SafeDescriptor at this point.
416    unsafe { RutabagaDescriptor::from_raw_descriptor(s.into_raw_descriptor()) }
417}
418
419struct RutabagaMemoryRegion {
420    region: Box<dyn RutabagaMappedRegion>,
421}
422
423impl RutabagaMemoryRegion {
424    pub fn new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion {
425        RutabagaMemoryRegion { region }
426    }
427}
428
429// SAFETY:
430//
431// Self guarantees `ptr`..`ptr+size` is an mmaped region owned by this object that
432// can't be unmapped during the `MappedRegion`'s lifetime.
433unsafe impl MappedRegion for RutabagaMemoryRegion {
434    fn as_ptr(&self) -> *mut u8 {
435        self.region.as_ptr()
436    }
437
438    fn size(&self) -> usize {
439        self.region.size()
440    }
441}
442
443impl Display for VmMemorySource {
444    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
445        use self::VmMemorySource::*;
446
447        match self {
448            SharedMemory(..) => write!(f, "VmMemorySource::SharedMemory"),
449            Descriptor { .. } => write!(f, "VmMemorySource::Descriptor"),
450            Vulkan { .. } => write!(f, "VmMemorySource::Vulkan"),
451            ExternalMapping { .. } => write!(f, "VmMemorySource::ExternalMapping"),
452        }
453    }
454}
455
456impl VmMemorySource {
457    /// Map the resource and return its mapping and size in bytes.
458    fn map(
459        self,
460        gralloc: &mut RutabagaGralloc,
461        prot: Protection,
462    ) -> anyhow::Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)> {
463        let (mem_region, size, descriptor) = match self {
464            VmMemorySource::Descriptor {
465                descriptor,
466                offset,
467                size,
468            } => (
469                map_descriptor(&descriptor, offset, size, prot)?,
470                size,
471                Some(descriptor),
472            ),
473
474            VmMemorySource::SharedMemory(shm) => {
475                (map_descriptor(&shm, 0, shm.size(), prot)?, shm.size(), None)
476            }
477            VmMemorySource::Vulkan {
478                descriptor,
479                handle_type,
480                memory_idx,
481                device_uuid,
482                driver_uuid,
483                size,
484            } => {
485                let device_id = DeviceId {
486                    device_uuid,
487                    driver_uuid,
488                };
489                let mapped_region = gralloc
490                    .import_and_map(
491                        RutabagaHandle {
492                            os_handle: to_rutabaga_desciptor(descriptor),
493                            handle_type,
494                        },
495                        VulkanInfo {
496                            memory_idx,
497                            device_id,
498                        },
499                        size,
500                    )
501                    .with_context(|| {
502                        format!(
503                            "gralloc failed to import and map, handle type: {handle_type}, memory index {memory_idx}, \
504                             size: {size}"
505                        )
506                    })?;
507                let mapped_region: Box<dyn MappedRegion> =
508                    Box::new(RutabagaMemoryRegion::new(mapped_region));
509                (mapped_region, size, None)
510            }
511            VmMemorySource::ExternalMapping { ptr, size } => {
512                let mapped_region: Box<dyn MappedRegion> = Box::new(ExternalMapping {
513                    ptr,
514                    size: size as usize,
515                });
516                (mapped_region, size, None)
517            }
518        };
519        Ok((mem_region, size, descriptor))
520    }
521}
522
523/// Destination of a `VmMemoryRequest::RegisterMemory` mapping in guest address space.
524#[derive(Serialize, Deserialize)]
525pub enum VmMemoryDestination {
526    /// Map at an offset within an existing PCI BAR allocation.
527    ExistingAllocation { allocation: Alloc, offset: u64 },
528    /// Map at the specified guest physical address.
529    GuestPhysicalAddress(u64),
530}
531
532impl VmMemoryDestination {
533    /// Allocate and return the guest address of a memory mapping destination.
534    pub fn allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress> {
535        let addr = match self {
536            VmMemoryDestination::ExistingAllocation { allocation, offset } => allocator
537                .mmio_allocator_any()
538                .address_from_pci_offset(allocation, offset, size)
539                .map_err(|_e| SysError::new(EINVAL))?,
540            VmMemoryDestination::GuestPhysicalAddress(gpa) => gpa,
541        };
542        Ok(GuestAddress(addr))
543    }
544}
545
546/// Request to register or unregister an ioevent.
547#[derive(Serialize, Deserialize)]
548pub struct IoEventUpdateRequest {
549    pub event: Event,
550    pub addr: u64,
551    pub datamatch: Datamatch,
552    pub register: bool,
553}
554
555/// Request to mmap a file to a shared memory.
556/// This request is supposed to follow a `VmMemoryRequest::MmapAndRegisterMemory` request that
557/// contains `SharedMemory` that `file` is mmaped to.
558#[cfg(any(target_os = "android", target_os = "linux"))]
559#[derive(Serialize, Deserialize)]
560pub struct VmMemoryFileMapping {
561    #[serde(with = "with_as_descriptor")]
562    pub file: File,
563    pub length: usize,
564    pub mem_offset: usize,
565    pub file_offset: u64,
566}
567
568#[derive(Serialize, Deserialize)]
569pub enum VmMemoryRequest {
570    /// Prepare a shared memory region to make later operations more efficient. This
571    /// may be a no-op depending on underlying platform support.
572    PrepareSharedMemoryRegion { alloc: Alloc, cache: MemCacheType },
573    /// Register a memory to be mapped to the guest.
574    RegisterMemory {
575        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
576        source: VmMemorySource,
577        /// Where to map the memory in the guest.
578        dest: VmMemoryDestination,
579        /// Whether to map the memory read only (true) or read-write (false).
580        prot: Protection,
581        /// Cache attribute for guest memory setting
582        cache: MemCacheType,
583    },
584    #[cfg(any(target_os = "android", target_os = "linux"))]
585    /// Call mmap to `shm` and register the memory region as a read-only guest memory.
586    /// This request is followed by an array of `VmMemoryFileMapping` with length
587    /// `num_file_mappings`
588    MmapAndRegisterMemory {
589        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
590        shm: SharedMemory,
591        /// Where to map the memory in the guest.
592        dest: VmMemoryDestination,
593        /// Length of the array of `VmMemoryFileMapping` that follows.
594        num_file_mappings: usize,
595    },
596    /// Call hypervisor to free the given memory range.
597    DynamicallyFreeMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
598    /// Call hypervisor to reclaim a priorly freed memory range.
599    DynamicallyReclaimMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
600    /// Balloon allocation/deallocation target reached.
601    BalloonTargetReached { size: u64 },
602    /// Unregister the given memory slot that was previously registered with `RegisterMemory`.
603    UnregisterMemory(VmMemoryRegionId),
604    /// Register an eventfd with raw guest memory address.
605    IoEventRaw(IoEventUpdateRequest),
606}
607
608/// Struct for managing `VmMemoryRequest`s IOMMU related state.
609pub struct VmMemoryRequestIommuClient {
610    tube: Arc<Mutex<Tube>>,
611    registered_memory: BTreeSet<VmMemoryRegionId>,
612}
613
614impl VmMemoryRequestIommuClient {
615    /// Constructs `VmMemoryRequestIommuClient` from a tube for communication with the viommu.
616    pub fn new(tube: Arc<Mutex<Tube>>) -> Self {
617        Self {
618            tube,
619            registered_memory: BTreeSet::new(),
620        }
621    }
622}
623
624enum RegisteredMemory {
625    FixedMapping {
626        slot: MemSlot,
627        offset: usize,
628        size: usize,
629    },
630    DynamicMapping {
631        slot: MemSlot,
632    },
633}
634
635pub struct VmMappedMemoryRegion {
636    guest_address: GuestAddress,
637    slot: MemSlot,
638}
639
640#[derive(Default)]
641pub struct VmMemoryRegionState {
642    mapped_regions: HashMap<Alloc, VmMappedMemoryRegion>,
643    registered_memory: BTreeMap<VmMemoryRegionId, RegisteredMemory>,
644}
645
646fn try_map_to_prepared_region(
647    vm: &mut impl Vm,
648    region_state: &mut VmMemoryRegionState,
649    source: &VmMemorySource,
650    dest: &VmMemoryDestination,
651    prot: &Protection,
652) -> Option<VmMemoryResponse> {
653    let VmMemoryDestination::ExistingAllocation {
654        allocation,
655        offset: dest_offset,
656    } = dest
657    else {
658        return None;
659    };
660
661    let VmMappedMemoryRegion {
662        guest_address,
663        slot,
664    } = region_state.mapped_regions.get(allocation)?;
665
666    let (descriptor, file_offset, size) = match source {
667        VmMemorySource::Descriptor {
668            descriptor,
669            offset,
670            size,
671        } => (
672            Descriptor(descriptor.as_raw_descriptor()),
673            *offset,
674            *size as usize,
675        ),
676        VmMemorySource::SharedMemory(shm) => {
677            let size = shm.size() as usize;
678            (Descriptor(shm.as_raw_descriptor()), 0, size)
679        }
680        _ => {
681            let error = anyhow::anyhow!(
682                "source {} is not compatible with fixed mapping into prepared memory region",
683                source
684            );
685            return Some(VmMemoryResponse::Err(error.into()));
686        }
687    };
688    if let Err(err) = vm
689        .add_fd_mapping(
690            *slot,
691            *dest_offset as usize,
692            size,
693            &descriptor,
694            file_offset,
695            *prot,
696        )
697        .context("failed to add fd mapping when trying to map to prepared region")
698    {
699        return Some(VmMemoryResponse::Err(err.into()));
700    }
701
702    let guest_address = GuestAddress(guest_address.0 + dest_offset);
703    let region_id = VmMemoryRegionId(guest_address);
704    region_state.registered_memory.insert(
705        region_id,
706        RegisteredMemory::FixedMapping {
707            slot: *slot,
708            offset: *dest_offset as usize,
709            size,
710        },
711    );
712
713    Some(VmMemoryResponse::RegisterMemory {
714        region_id,
715        slot: *slot,
716    })
717}
718
719impl VmMemoryRequest {
720    /// Executes this request on the given Vm.
721    ///
722    /// # Arguments
723    /// * `vm` - The `Vm` to perform the request on.
724    /// * `allocator` - Used to allocate addresses.
725    ///
726    /// This does not return a result, instead encapsulating the success or failure in a
727    /// `VmMemoryResponse` with the intended purpose of sending the response back over the socket
728    /// that received this `VmMemoryResponse`.
729    pub fn execute(
730        self,
731        #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube,
732        vm: &mut impl Vm,
733        sys_allocator: &mut SystemAllocator,
734        gralloc: &mut RutabagaGralloc,
735        iommu_client: Option<&mut VmMemoryRequestIommuClient>,
736        region_state: &mut VmMemoryRegionState,
737    ) -> VmMemoryResponse {
738        use self::VmMemoryRequest::*;
739        match self {
740            PrepareSharedMemoryRegion { alloc, cache } => {
741                // Currently the iommu_client is only used by virtio-gpu when used alongside GPU
742                // pci-passthrough.
743                //
744                // TODO(b/323368701): Make compatible with iommu_client by ensuring that
745                // VirtioIOMMUVfioCommand::VfioDmabufMap is submitted for both dynamic mappings and
746                // fixed mappings (i.e. whether or not try_map_to_prepared_region succeeds in
747                // RegisterMemory case below).
748                assert!(iommu_client.is_none());
749
750                if !sys::should_prepare_memory_region() {
751                    return VmMemoryResponse::Ok;
752                }
753
754                match sys::prepare_shared_memory_region(vm, sys_allocator, alloc, cache)
755                    .context("failed to prepare shared memory region")
756                {
757                    Ok(region) => {
758                        region_state.mapped_regions.insert(alloc, region);
759                        VmMemoryResponse::Ok
760                    }
761                    Err(e) => VmMemoryResponse::Err(e.into()),
762                }
763            }
764            RegisterMemory {
765                source,
766                dest,
767                prot,
768                cache,
769            } => {
770                if let Some(resp) =
771                    try_map_to_prepared_region(vm, region_state, &source, &dest, &prot)
772                {
773                    return resp;
774                }
775
776                // Correct on Windows because callers of this IPC guarantee descriptor is a mapping
777                // handle.
778                let (mapped_region, size, descriptor) =
779                    match source.map(gralloc, prot).context("gralloc mapping") {
780                        Ok((region, size, descriptor)) => (region, size, descriptor),
781                        Err(e) => return VmMemoryResponse::Err(e.into()),
782                    };
783
784                let guest_addr = match dest
785                    .allocate(sys_allocator, size)
786                    .context("VM memory destination allocation fails")
787                {
788                    Ok(addr) => addr,
789                    Err(e) => return VmMemoryResponse::Err(e.into()),
790                };
791
792                let slot = match vm
793                    .add_memory_region(
794                        guest_addr,
795                        mapped_region,
796                        prot == Protection::read(),
797                        false,
798                        cache,
799                    )
800                    .context("failed to add memory region when registering memory")
801                {
802                    Ok(slot) => slot,
803                    Err(e) => return VmMemoryResponse::Err(e.into()),
804                };
805
806                let region_id = VmMemoryRegionId(guest_addr);
807                if let (Some(descriptor), Some(iommu_client)) = (descriptor, iommu_client) {
808                    let request =
809                        VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDmabufMap {
810                            region_id,
811                            gpa: guest_addr.0,
812                            size,
813                            dma_buf: descriptor,
814                        });
815
816                    match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
817                        Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok)) => (),
818                        resp => {
819                            let error = anyhow::anyhow!(
820                                "Unexpected virtio-iommu message response when registering memory: \
821                                 {:?}", resp);
822                            if let Err(e) = vm.remove_memory_region(slot) {
823                                // There is nothing we can do here, so we just log a warning
824                                // message.
825                                warn!("failed to remove memory region: {:?}", e);
826                            }
827                            return VmMemoryResponse::Err(error.into());
828                        }
829                    };
830
831                    iommu_client.registered_memory.insert(region_id);
832                }
833
834                region_state
835                    .registered_memory
836                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
837                VmMemoryResponse::RegisterMemory { region_id, slot }
838            }
839            #[cfg(any(target_os = "android", target_os = "linux"))]
840            MmapAndRegisterMemory {
841                shm,
842                dest,
843                num_file_mappings,
844            } => {
845                // Define a callback to be executed with extended limit of file counts.
846                // It recieves `num_file_mappings` FDs and call `add_fd_mapping` for each.
847                let callback = || {
848                    let mem = match MemoryMappingBuilder::new(shm.size() as usize)
849                        .from_shared_memory(&shm)
850                        .build()
851                        .context("failed to build MemoryMapping from shared memory")
852                    {
853                        Ok(mem) => mem,
854                        Err(e) => return Err(VmMemoryResponse::Err(e.into())),
855                    };
856                    let mut mmap_arena = MemoryMappingArena::from(mem);
857
858                    // If `num_file_mappings` exceeds `SCM_MAX_FD`, `file_mappings` are sent in
859                    // chunks of length `SCM_MAX_FD`.
860                    let mut file_mappings = Vec::with_capacity(num_file_mappings);
861                    let mut read = 0;
862                    while read < num_file_mappings {
863                        let len = std::cmp::min(num_file_mappings - read, base::unix::SCM_MAX_FD);
864                        let mps: Vec<VmMemoryFileMapping> = match tube
865                            .recv_with_max_fds(len)
866                            .with_context(|| format!("get {num_file_mappings} FDs to be mapped"))
867                        {
868                            Ok(m) => m,
869                            Err(e) => return Err(VmMemoryResponse::Err(e.into())),
870                        };
871                        file_mappings.extend(mps.into_iter());
872                        read += len;
873                    }
874
875                    for VmMemoryFileMapping {
876                        mem_offset,
877                        length,
878                        file,
879                        file_offset,
880                    } in file_mappings
881                    {
882                        if let Err(e) = mmap_arena
883                            .add_fd_mapping(
884                                mem_offset,
885                                length,
886                                &file,
887                                file_offset,
888                                Protection::read(),
889                            )
890                            .context(
891                                "failed to add fd mapping when handling mmap and register memory",
892                            )
893                        {
894                            return Err(VmMemoryResponse::Err(e.into()));
895                        }
896                    }
897                    Ok(mmap_arena)
898                };
899                let mmap_arena = match call_with_extended_max_files(callback)
900                    .context("failed to set max count of file descriptors")
901                {
902                    Ok(Ok(m)) => m,
903                    Ok(Err(e)) => {
904                        return e;
905                    }
906                    Err(e) => {
907                        error!("{e:?}");
908                        return VmMemoryResponse::Err(e.into());
909                    }
910                };
911
912                let size = shm.size();
913                let guest_addr = match dest.allocate(sys_allocator, size).context(
914                    "VM memory destination allocation fails when handling mmap and register memory",
915                ) {
916                    Ok(addr) => addr,
917                    Err(e) => return VmMemoryResponse::Err(e.into()),
918                };
919
920                let slot = match vm
921                    .add_memory_region(
922                        guest_addr,
923                        Box::new(mmap_arena),
924                        true,
925                        false,
926                        MemCacheType::CacheCoherent,
927                    )
928                    .context("failed to add memory region when handling mmap and register memory")
929                {
930                    Ok(slot) => slot,
931                    Err(e) => return VmMemoryResponse::Err(e.into()),
932                };
933
934                let region_id = VmMemoryRegionId(guest_addr);
935
936                region_state
937                    .registered_memory
938                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
939
940                VmMemoryResponse::RegisterMemory { region_id, slot }
941            }
942            UnregisterMemory(id) => match region_state.registered_memory.remove(&id) {
943                Some(RegisteredMemory::DynamicMapping { slot }) => match vm
944                    .remove_memory_region(slot)
945                    .context(
946                        "failed to remove memory region when unregistering dynamic mapping memory",
947                    ) {
948                    Ok(_) => {
949                        if let Some(iommu_client) = iommu_client {
950                            if iommu_client.registered_memory.remove(&id) {
951                                let request = VirtioIOMMURequest::VfioCommand(
952                                    VirtioIOMMUVfioCommand::VfioDmabufUnmap(id),
953                                );
954
955                                match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
956                                    Ok(VirtioIOMMUResponse::VfioResponse(
957                                        VirtioIOMMUVfioResult::Ok,
958                                    )) => VmMemoryResponse::Ok,
959                                    resp => {
960                                        let error = anyhow::anyhow!(
961                                            "Unexpected virtio-iommu message response when \
962                                             unregistering memory: {:?}",
963                                            resp
964                                        );
965                                        VmMemoryResponse::Err(error.into())
966                                    }
967                                }
968                            } else {
969                                VmMemoryResponse::Ok
970                            }
971                        } else {
972                            VmMemoryResponse::Ok
973                        }
974                    }
975                    Err(e) => VmMemoryResponse::Err(e.into()),
976                },
977                Some(RegisteredMemory::FixedMapping { slot, offset, size }) => {
978                    match vm.remove_mapping(slot, offset, size).context(
979                        "failed to remove memory mapping when unregistering fixed mapping memory",
980                    ) {
981                        Ok(()) => VmMemoryResponse::Ok,
982                        Err(e) => VmMemoryResponse::Err(e.into()),
983                    }
984                }
985                None => {
986                    let error =
987                        anyhow::anyhow!("can't find the memory region when unregistering memory");
988                    VmMemoryResponse::Err(error.into())
989                }
990            },
991            DynamicallyFreeMemoryRanges { ranges } => {
992                let mut r = VmMemoryResponse::Ok;
993                for (guest_address, size) in ranges {
994                    match vm
995                        .handle_balloon_event(BalloonEvent::Inflate(MemRegion {
996                            guest_address,
997                            size,
998                        }))
999                        .context(
1000                            "failed to handle the inflate balloon event when freeing memory ranges \
1001                             dynamically",
1002                        ) {
1003                        Ok(_) => {}
1004                        Err(e) => {
1005                            error!("{:?}", e);
1006                            r = VmMemoryResponse::Err(e.into());
1007                            break;
1008                        }
1009                    }
1010                }
1011                r
1012            }
1013            DynamicallyReclaimMemoryRanges { ranges } => {
1014                let mut r = VmMemoryResponse::Ok;
1015                for (guest_address, size) in ranges {
1016                    match vm
1017                        .handle_balloon_event(BalloonEvent::Deflate(MemRegion {
1018                            guest_address,
1019                            size,
1020                        }))
1021                        .context(
1022                            "failed to handle the deflate balloon event when reclaiming memory \
1023                             ranges dynamically",
1024                        ) {
1025                        Ok(_) => {}
1026                        Err(e) => {
1027                            error!("{:?}", e);
1028                            r = VmMemoryResponse::Err(e.into());
1029                            break;
1030                        }
1031                    }
1032                }
1033                r
1034            }
1035            BalloonTargetReached { size } => {
1036                match vm
1037                    .handle_balloon_event(BalloonEvent::BalloonTargetReached(size))
1038                    .context("failed to handle the target reached balloon event")
1039                {
1040                    Ok(_) => VmMemoryResponse::Ok,
1041                    Err(e) => VmMemoryResponse::Err(e.into()),
1042                }
1043            }
1044            IoEventRaw(request) => {
1045                let res = if request.register {
1046                    vm.register_ioevent(
1047                        &request.event,
1048                        IoEventAddress::Mmio(request.addr),
1049                        request.datamatch,
1050                    )
1051                    .context("failed to register IO event")
1052                } else {
1053                    vm.unregister_ioevent(
1054                        &request.event,
1055                        IoEventAddress::Mmio(request.addr),
1056                        request.datamatch,
1057                    )
1058                    .context("failed to unregister IO event")
1059                };
1060                match res {
1061                    Ok(_) => VmMemoryResponse::Ok,
1062                    Err(e) => VmMemoryResponse::Err(e.into()),
1063                }
1064            }
1065        }
1066    }
1067}
1068
1069#[derive(Serialize, Deserialize, Debug, PartialOrd, PartialEq, Eq, Ord, Clone, Copy)]
1070/// Identifer for registered memory regions. Globally unique.
1071// The current implementation uses guest physical address as the unique identifier.
1072pub struct VmMemoryRegionId(pub GuestAddress);
1073
1074#[derive(Serialize, Deserialize, Debug)]
1075pub enum VmMemoryResponse {
1076    /// The request to register memory into guest address space was successful.
1077    RegisterMemory {
1078        region_id: VmMemoryRegionId,
1079        slot: u32,
1080    },
1081    Ok,
1082    Err(VmMemoryResponseError),
1083}
1084
1085impl<T> From<Result<T>> for VmMemoryResponse {
1086    fn from(r: Result<T>) -> Self {
1087        match r {
1088            Ok(_) => VmMemoryResponse::Ok,
1089            Err(e) => VmMemoryResponse::Err(anyhow::Error::new(e).into()),
1090        }
1091    }
1092}
1093
1094#[derive(Debug, thiserror::Error)]
1095#[error("Vm memory response error: {0}")]
1096pub struct VmMemoryResponseError(#[from] pub anyhow::Error);
1097
1098impl TryFrom<FlatVmMemoryResponseError> for VmMemoryResponseError {
1099    type Error = anyhow::Error;
1100    fn try_from(value: FlatVmMemoryResponseError) -> StdResult<Self, Self::Error> {
1101        let inner = value
1102            .0
1103            .into_iter()
1104            .fold(
1105                None,
1106                |error: Option<anyhow::Error>, current_context| match error {
1107                    Some(error) => Some(error.context(current_context)),
1108                    None => Some(anyhow::Error::msg(current_context)),
1109                },
1110            )
1111            .context("should carry at least one error")?;
1112        Ok(Self(inner))
1113    }
1114}
1115
1116impl Serialize for VmMemoryResponseError {
1117    fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error>
1118    where
1119        S: serde::Serializer,
1120    {
1121        let flat: FlatVmMemoryResponseError = self.into();
1122        flat.serialize(serializer)
1123    }
1124}
1125
1126impl<'de> Deserialize<'de> for VmMemoryResponseError {
1127    fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error>
1128    where
1129        D: serde::Deserializer<'de>,
1130    {
1131        let flat = FlatVmMemoryResponseError::deserialize(deserializer)?;
1132        flat.try_into()
1133            .map_err(|e: anyhow::Error| D::Error::custom(e.to_string()))
1134    }
1135}
1136
1137#[derive(Debug, Serialize, Deserialize)]
1138struct FlatVmMemoryResponseError(Vec<String>);
1139
1140impl From<&VmMemoryResponseError> for FlatVmMemoryResponseError {
1141    fn from(value: &VmMemoryResponseError) -> Self {
1142        let contexts = value
1143            .0
1144            .chain()
1145            .map(ToString::to_string)
1146            .rev()
1147            .collect::<Vec<_>>();
1148        Self(contexts)
1149    }
1150}
1151
1152#[derive(Serialize, Deserialize, Debug)]
1153pub enum VmIrqRequest {
1154    /// Allocate one gsi, and associate gsi to irqfd with register_irqfd()
1155    AllocateOneMsi {
1156        irqfd: Event,
1157        device_id: u32,
1158        queue_id: usize,
1159        device_name: String,
1160    },
1161    /// Allocate a specific gsi to irqfd with register_irqfd(). This must only
1162    /// be used when it is known that the gsi is free. Only the snapshot
1163    /// subsystem can make this guarantee, and use of this request by any other
1164    /// caller is strongly discouraged.
1165    AllocateOneMsiAtGsi {
1166        irqfd: Event,
1167        gsi: u32,
1168        device_id: u32,
1169        queue_id: usize,
1170        device_name: String,
1171    },
1172    /// Add one msi route entry into the IRQ chip.
1173    AddMsiRoute {
1174        gsi: u32,
1175        msi_address: u64,
1176        msi_data: u32,
1177        #[cfg(target_arch = "aarch64")]
1178        pci_address: resources::PciAddress,
1179    },
1180    // unregister_irqfs() and release gsi
1181    ReleaseOneIrq {
1182        gsi: u32,
1183        irqfd: Event,
1184    },
1185}
1186
1187/// Data to set up an IRQ event or IRQ route on the IRQ chip.
1188/// VmIrqRequest::execute can't take an `IrqChip` argument, because of a dependency cycle between
1189/// devices and vm_control, so it takes a Fn that processes an `IrqSetup`.
1190pub enum IrqSetup<'a> {
1191    Event(u32, &'a Event, u32, usize, String),
1192    Route(IrqRoute),
1193    UnRegister(u32, &'a Event),
1194}
1195
1196impl VmIrqRequest {
1197    /// Executes this request on the given Vm.
1198    ///
1199    /// # Arguments
1200    /// * `set_up_irq` - A function that applies an `IrqSetup` to an IRQ chip.
1201    ///
1202    /// This does not return a result, instead encapsulating the success or failure in a
1203    /// `VmIrqResponse` with the intended purpose of sending the response back over the socket
1204    /// that received this `VmIrqResponse`.
1205    pub fn execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse
1206    where
1207        F: FnOnce(IrqSetup) -> Result<()>,
1208    {
1209        use self::VmIrqRequest::*;
1210        match *self {
1211            AllocateOneMsi {
1212                ref irqfd,
1213                device_id,
1214                queue_id,
1215                ref device_name,
1216            } => {
1217                if let Some(irq_num) = sys_allocator.allocate_irq() {
1218                    match set_up_irq(IrqSetup::Event(
1219                        irq_num,
1220                        irqfd,
1221                        device_id,
1222                        queue_id,
1223                        device_name.clone(),
1224                    )) {
1225                        Ok(_) => VmIrqResponse::AllocateOneMsi { gsi: irq_num },
1226                        Err(e) => VmIrqResponse::Err(e),
1227                    }
1228                } else {
1229                    VmIrqResponse::Err(SysError::new(EINVAL))
1230                }
1231            }
1232            AllocateOneMsiAtGsi {
1233                ref irqfd,
1234                gsi,
1235                device_id,
1236                queue_id,
1237                ref device_name,
1238            } => {
1239                match set_up_irq(IrqSetup::Event(
1240                    gsi,
1241                    irqfd,
1242                    device_id,
1243                    queue_id,
1244                    device_name.clone(),
1245                )) {
1246                    Ok(_) => VmIrqResponse::Ok,
1247                    Err(e) => VmIrqResponse::Err(e),
1248                }
1249            }
1250            AddMsiRoute {
1251                gsi,
1252                msi_address,
1253                msi_data,
1254                #[cfg(target_arch = "aarch64")]
1255                pci_address,
1256            } => {
1257                let route = IrqRoute {
1258                    gsi,
1259                    source: IrqSource::Msi {
1260                        address: msi_address,
1261                        data: msi_data,
1262                        #[cfg(target_arch = "aarch64")]
1263                        pci_address,
1264                    },
1265                };
1266                match set_up_irq(IrqSetup::Route(route)) {
1267                    Ok(_) => VmIrqResponse::Ok,
1268                    Err(e) => VmIrqResponse::Err(e),
1269                }
1270            }
1271            ReleaseOneIrq { gsi, ref irqfd } => {
1272                let _ = set_up_irq(IrqSetup::UnRegister(gsi, irqfd));
1273                sys_allocator.release_irq(gsi);
1274                VmIrqResponse::Ok
1275            }
1276        }
1277    }
1278}
1279
1280#[derive(Serialize, Deserialize, Debug)]
1281pub enum VmIrqResponse {
1282    AllocateOneMsi { gsi: u32 },
1283    Ok,
1284    Err(SysError),
1285}
1286
1287#[derive(Serialize, Deserialize, Debug, Clone)]
1288pub enum DevicesState {
1289    Sleep,
1290    Wake,
1291}
1292
1293#[derive(Serialize, Deserialize, Debug, Clone)]
1294pub enum BatControlResult {
1295    Ok,
1296    NoBatDevice,
1297    NoSuchHealth,
1298    NoSuchProperty,
1299    NoSuchStatus,
1300    NoSuchBatType,
1301    StringParseIntErr,
1302    StringParseBoolErr,
1303}
1304
1305impl Display for BatControlResult {
1306    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1307        use self::BatControlResult::*;
1308
1309        match self {
1310            Ok => write!(f, "Setting battery property successfully"),
1311            NoBatDevice => write!(f, "No battery device created"),
1312            NoSuchHealth => write!(f, "Invalid Battery health setting. Only support: unknown/good/overheat/dead/overvoltage/unexpectedfailure/cold/watchdogtimerexpire/safetytimerexpire/overcurrent"),
1313            NoSuchProperty => write!(f, "Battery doesn't have such property. Only support: status/health/present/capacity/aconline"),
1314            NoSuchStatus => write!(f, "Invalid Battery status setting. Only support: unknown/charging/discharging/notcharging/full"),
1315            NoSuchBatType => write!(f, "Invalid Battery type setting. Only support: goldfish"),
1316            StringParseIntErr => write!(f, "Battery property target ParseInt error"),
1317            StringParseBoolErr => write!(f, "Battery property target ParseBool error"),
1318        }
1319    }
1320}
1321
1322#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, PartialEq, Eq)]
1323#[serde(rename_all = "kebab-case")]
1324pub enum BatteryType {
1325    #[default]
1326    Goldfish,
1327}
1328
1329impl FromStr for BatteryType {
1330    type Err = BatControlResult;
1331
1332    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1333        match s {
1334            "goldfish" => Ok(BatteryType::Goldfish),
1335            _ => Err(BatControlResult::NoSuchBatType),
1336        }
1337    }
1338}
1339
1340#[derive(Serialize, Deserialize, Debug)]
1341pub enum BatProperty {
1342    Status,
1343    Health,
1344    Present,
1345    Capacity,
1346    ACOnline,
1347    SetFakeBatConfig,
1348    CancelFakeBatConfig,
1349}
1350
1351impl FromStr for BatProperty {
1352    type Err = BatControlResult;
1353
1354    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1355        match s {
1356            "status" => Ok(BatProperty::Status),
1357            "health" => Ok(BatProperty::Health),
1358            "present" => Ok(BatProperty::Present),
1359            "capacity" => Ok(BatProperty::Capacity),
1360            "aconline" => Ok(BatProperty::ACOnline),
1361            "set_fake_bat_config" => Ok(BatProperty::SetFakeBatConfig),
1362            "cancel_fake_bat_config" => Ok(BatProperty::CancelFakeBatConfig),
1363            _ => Err(BatControlResult::NoSuchProperty),
1364        }
1365    }
1366}
1367
1368impl Display for BatProperty {
1369    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1370        match *self {
1371            BatProperty::Status => write!(f, "status"),
1372            BatProperty::Health => write!(f, "health"),
1373            BatProperty::Present => write!(f, "present"),
1374            BatProperty::Capacity => write!(f, "capacity"),
1375            BatProperty::ACOnline => write!(f, "aconline"),
1376            BatProperty::SetFakeBatConfig => write!(f, "set_fake_bat_config"),
1377            BatProperty::CancelFakeBatConfig => write!(f, "cancel_fake_bat_config"),
1378        }
1379    }
1380}
1381
1382#[derive(Serialize, Deserialize, Debug)]
1383pub enum BatStatus {
1384    Unknown,
1385    Charging,
1386    DisCharging,
1387    NotCharging,
1388    Full,
1389}
1390
1391impl BatStatus {
1392    pub fn new(status: String) -> std::result::Result<Self, BatControlResult> {
1393        match status.as_str() {
1394            "unknown" => Ok(BatStatus::Unknown),
1395            "charging" => Ok(BatStatus::Charging),
1396            "discharging" => Ok(BatStatus::DisCharging),
1397            "notcharging" => Ok(BatStatus::NotCharging),
1398            "full" => Ok(BatStatus::Full),
1399            _ => Err(BatControlResult::NoSuchStatus),
1400        }
1401    }
1402}
1403
1404impl FromStr for BatStatus {
1405    type Err = BatControlResult;
1406
1407    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1408        match s {
1409            "unknown" => Ok(BatStatus::Unknown),
1410            "charging" => Ok(BatStatus::Charging),
1411            "discharging" => Ok(BatStatus::DisCharging),
1412            "notcharging" => Ok(BatStatus::NotCharging),
1413            "full" => Ok(BatStatus::Full),
1414            _ => Err(BatControlResult::NoSuchStatus),
1415        }
1416    }
1417}
1418
1419impl From<BatStatus> for u32 {
1420    fn from(status: BatStatus) -> Self {
1421        status as u32
1422    }
1423}
1424
1425#[derive(Serialize, Deserialize, Debug)]
1426pub enum BatHealth {
1427    Unknown,
1428    Good,
1429    Overheat,
1430    Dead,
1431    OverVoltage,
1432    UnexpectedFailure,
1433    Cold,
1434    WatchdogTimerExpire,
1435    SafetyTimerExpire,
1436    OverCurrent,
1437}
1438
1439impl FromStr for BatHealth {
1440    type Err = BatControlResult;
1441
1442    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1443        match s {
1444            "unknown" => Ok(BatHealth::Unknown),
1445            "good" => Ok(BatHealth::Good),
1446            "overheat" => Ok(BatHealth::Overheat),
1447            "dead" => Ok(BatHealth::Dead),
1448            "overvoltage" => Ok(BatHealth::OverVoltage),
1449            "unexpectedfailure" => Ok(BatHealth::UnexpectedFailure),
1450            "cold" => Ok(BatHealth::Cold),
1451            "watchdogtimerexpire" => Ok(BatHealth::WatchdogTimerExpire),
1452            "safetytimerexpire" => Ok(BatHealth::SafetyTimerExpire),
1453            "overcurrent" => Ok(BatHealth::OverCurrent),
1454            _ => Err(BatControlResult::NoSuchHealth),
1455        }
1456    }
1457}
1458
1459impl From<BatHealth> for u32 {
1460    fn from(status: BatHealth) -> Self {
1461        status as u32
1462    }
1463}
1464
1465#[derive(Serialize, Deserialize, Debug)]
1466pub enum BatControlCommand {
1467    SetStatus(BatStatus),
1468    SetHealth(BatHealth),
1469    SetPresent(u32),
1470    SetCapacity(u32),
1471    SetACOnline(u32),
1472    SetFakeBatConfig(u32),
1473    CancelFakeConfig,
1474}
1475
1476impl BatControlCommand {
1477    pub fn new(property: String, target: String) -> std::result::Result<Self, BatControlResult> {
1478        let cmd = property.parse::<BatProperty>()?;
1479        match cmd {
1480            BatProperty::Status => Ok(BatControlCommand::SetStatus(target.parse::<BatStatus>()?)),
1481            BatProperty::Health => Ok(BatControlCommand::SetHealth(target.parse::<BatHealth>()?)),
1482            BatProperty::Present => Ok(BatControlCommand::SetPresent(
1483                target
1484                    .parse::<u32>()
1485                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1486            )),
1487            BatProperty::Capacity => Ok(BatControlCommand::SetCapacity(
1488                target
1489                    .parse::<u32>()
1490                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1491            )),
1492            BatProperty::ACOnline => Ok(BatControlCommand::SetACOnline(
1493                target
1494                    .parse::<u32>()
1495                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1496            )),
1497            BatProperty::SetFakeBatConfig => Ok(BatControlCommand::SetFakeBatConfig(
1498                target
1499                    .parse::<u32>()
1500                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1501            )),
1502            BatProperty::CancelFakeBatConfig => Ok(BatControlCommand::CancelFakeConfig),
1503        }
1504    }
1505}
1506
1507/// Used for VM to control battery properties.
1508pub struct BatControl {
1509    pub type_: BatteryType,
1510    pub control_tube: Tube,
1511}
1512
1513/// Used for VM to control for virtio-snd
1514#[derive(Serialize, Deserialize, Debug)]
1515pub enum SndControlCommand {
1516    MuteAll(bool),
1517}
1518
1519// Used to mark hotplug pci device's device type
1520#[derive(Serialize, Deserialize, Debug, Clone)]
1521pub enum HotPlugDeviceType {
1522    UpstreamPort,
1523    DownstreamPort,
1524    EndPoint,
1525}
1526
1527// Used for VM to hotplug pci devices
1528#[derive(Serialize, Deserialize, Debug, Clone)]
1529pub struct HotPlugDeviceInfo {
1530    pub device_type: HotPlugDeviceType,
1531    pub path: PathBuf,
1532    pub hp_interrupt: bool,
1533}
1534
1535/// Message for communicating a suspend or resume to the virtio-pvclock device.
1536#[derive(Serialize, Deserialize, Debug, Clone)]
1537pub enum PvClockCommand {
1538    Suspend,
1539    Resume,
1540}
1541
1542/// Message used by virtio-pvclock to communicate command results.
1543#[derive(Serialize, Deserialize, Debug)]
1544pub enum PvClockCommandResponse {
1545    Ok,
1546    Resumed { total_suspended_ticks: u64 },
1547    DeviceInactive,
1548    Err(SysError),
1549}
1550
1551/// Commands for vmm-swap feature
1552#[derive(Serialize, Deserialize, Debug)]
1553pub enum SwapCommand {
1554    Enable,
1555    Trim,
1556    SwapOut,
1557    Disable { slow_file_cleanup: bool },
1558    Status,
1559}
1560
1561///
1562/// A request to the main process to perform some operation on the VM.
1563///
1564/// Unless otherwise noted, each request should expect a `VmResponse::Ok` to be received on success.
1565#[derive(Serialize, Deserialize, Debug)]
1566pub enum VmRequest {
1567    /// Break the VM's run loop and exit.
1568    Exit,
1569    /// Trigger a power button event in the guest.
1570    Powerbtn,
1571    /// Trigger a sleep button event in the guest.
1572    Sleepbtn,
1573    /// Trigger a RTC interrupt in the guest. When the irq associated with the RTC is
1574    /// resampled, it will be re-asserted as long as `clear_evt` is not signaled.
1575    Rtc { clear_evt: Event },
1576    /// Suspend the VM's VCPUs until resume.
1577    SuspendVcpus,
1578    /// Swap the memory content into files on a disk
1579    Swap(SwapCommand),
1580    /// Resume the VM's VCPUs that were previously suspended.
1581    ResumeVcpus,
1582    /// Inject a general-purpose event. If `clear_evt` is provided, when the irq associated
1583    /// with the GPE is resampled, it will be re-asserted as long as `clear_evt` is not
1584    /// signaled.
1585    Gpe { gpe: u32, clear_evt: Option<Event> },
1586    /// Inject a PCI PME
1587    PciPme(u16),
1588    /// Make the VM's RT VCPU real-time.
1589    MakeRT,
1590    /// Command for balloon driver.
1591    #[cfg(feature = "balloon")]
1592    BalloonCommand(BalloonControlCommand),
1593    /// Send a command to a disk chosen by `disk_index`.
1594    /// `disk_index` is a 0-based count of `--disk`, `--rwdisk`, and `-r` command-line options.
1595    DiskCommand {
1596        disk_index: usize,
1597        command: DiskControlCommand,
1598    },
1599    /// Command to use controller.
1600    UsbCommand(UsbControlCommand),
1601    /// Command to modify the gpu.
1602    #[cfg(feature = "gpu")]
1603    GpuCommand(GpuControlCommand),
1604    /// Command to set battery.
1605    BatCommand(BatteryType, BatControlCommand),
1606    /// Command to control snd devices
1607    #[cfg(feature = "audio")]
1608    SndCommand(SndControlCommand),
1609    /// Command to add/remove multiple vfio-pci devices
1610    HotPlugVfioCommand {
1611        device: HotPlugDeviceInfo,
1612        add: bool,
1613    },
1614    /// Command to add/remove network tap device as virtio-pci device
1615    #[cfg(feature = "pci-hotplug")]
1616    HotPlugNetCommand(NetControlCommand),
1617    /// Command to Snapshot devices
1618    Snapshot(SnapshotCommand),
1619    /// Register for event notification
1620    RegisterListener {
1621        socket_addr: String,
1622        event: RegisteredEvent,
1623    },
1624    /// Unregister for notifications for event
1625    UnregisterListener {
1626        socket_addr: String,
1627        event: RegisteredEvent,
1628    },
1629    /// Unregister for all event notification
1630    Unregister { socket_addr: String },
1631    /// Suspend VM VCPUs and Devices until resume.
1632    SuspendVm,
1633    /// Resume VM VCPUs and Devices.
1634    ResumeVm,
1635    /// Returns Vcpus PID/TID
1636    VcpuPidTid,
1637    /// Throttles the requested vCPU for microseconds
1638    Throttle(usize, u32),
1639    /// Returns unique descriptor of this VM.
1640    GetVmDescriptor,
1641    /// Registers memory in guest.
1642    RegisterMemory {
1643        fd: SafeDescriptor,
1644        offset: u64,
1645        range_start: u64,
1646        range_end: u64,
1647        cache_coherent: bool,
1648    },
1649    /// Unregisters memory in guest.
1650    UnregisterMemory { region_id: u64 },
1651}
1652
1653/// NOTE: when making any changes to this enum please also update
1654/// RegisteredEventFfi in crosvm_control/src/lib.rs
1655#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
1656pub enum RegisteredEvent {
1657    VirtioBalloonWsReport,
1658    VirtioBalloonResize,
1659    VirtioBalloonOOMDeflation,
1660}
1661
1662#[derive(Serialize, Deserialize, Debug)]
1663pub enum RegisteredEventWithData {
1664    VirtioBalloonWsReport {
1665        ws_buckets: Vec<balloon_control::WSBucket>,
1666        balloon_actual: u64,
1667    },
1668    VirtioBalloonResize,
1669    VirtioBalloonOOMDeflation,
1670}
1671
1672impl RegisteredEventWithData {
1673    pub fn into_event(&self) -> RegisteredEvent {
1674        match self {
1675            Self::VirtioBalloonWsReport { .. } => RegisteredEvent::VirtioBalloonWsReport,
1676            Self::VirtioBalloonResize => RegisteredEvent::VirtioBalloonResize,
1677            Self::VirtioBalloonOOMDeflation => RegisteredEvent::VirtioBalloonOOMDeflation,
1678        }
1679    }
1680
1681    #[cfg(feature = "registered_events")]
1682    pub fn into_proto(&self) -> registered_events::RegisteredEvent {
1683        match self {
1684            Self::VirtioBalloonWsReport {
1685                ws_buckets,
1686                balloon_actual,
1687            } => {
1688                let mut report = registered_events::VirtioBalloonWsReport {
1689                    balloon_actual: *balloon_actual,
1690                    ..registered_events::VirtioBalloonWsReport::new()
1691                };
1692                for ws in ws_buckets {
1693                    report.ws_buckets.push(registered_events::VirtioWsBucket {
1694                        age: ws.age,
1695                        file_bytes: ws.bytes[0],
1696                        anon_bytes: ws.bytes[1],
1697                        ..registered_events::VirtioWsBucket::new()
1698                    });
1699                }
1700                let mut event = registered_events::RegisteredEvent::new();
1701                event.set_ws_report(report);
1702                event
1703            }
1704            Self::VirtioBalloonResize => {
1705                let mut event = registered_events::RegisteredEvent::new();
1706                event.set_resize(registered_events::VirtioBalloonResize::new());
1707                event
1708            }
1709            Self::VirtioBalloonOOMDeflation => {
1710                let mut event = registered_events::RegisteredEvent::new();
1711                event.set_oom_deflation(registered_events::VirtioBalloonOOMDeflation::new());
1712                event
1713            }
1714        }
1715    }
1716
1717    pub fn from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self {
1718        RegisteredEventWithData::VirtioBalloonWsReport {
1719            ws_buckets: ws.ws.clone(),
1720            balloon_actual,
1721        }
1722    }
1723}
1724
1725pub fn handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse {
1726    // Forward the request to the block device process via its control socket.
1727    if let Err(e) = disk_host_tube.send(command) {
1728        error!("disk socket send failed: {}", e);
1729        return VmResponse::Err(SysError::new(EINVAL));
1730    }
1731
1732    // Wait for the disk control command to be processed
1733    match disk_host_tube.recv() {
1734        Ok(DiskControlResult::Ok) => VmResponse::Ok,
1735        Ok(DiskControlResult::Err(e)) => VmResponse::Err(e),
1736        Err(e) => {
1737            error!("disk socket recv failed: {}", e);
1738            VmResponse::Err(SysError::new(EINVAL))
1739        }
1740    }
1741}
1742
1743/// WARNING: descriptor must be a mapping handle on Windows.
1744fn map_descriptor(
1745    descriptor: &dyn AsRawDescriptor,
1746    offset: u64,
1747    size: u64,
1748    prot: Protection,
1749) -> Result<Box<dyn MappedRegion>> {
1750    let size: usize = size.try_into().map_err(|_e| SysError::new(ERANGE))?;
1751    match MemoryMappingBuilder::new(size)
1752        .from_descriptor(descriptor)
1753        .offset(offset)
1754        .protection(prot)
1755        .build()
1756    {
1757        Ok(mmap) => Ok(Box::new(mmap)),
1758        Err(MmapError::SystemCallFailed(e)) => Err(e),
1759        _ => Err(SysError::new(EINVAL)),
1760    }
1761}
1762
1763// Get vCPU state. vCPUs are expected to all hold the same state.
1764// In this function, there may be a time where vCPUs are not holding the same state
1765// as they transition from one state to the other. This is expected, and the final result
1766// should be all vCPUs holding the same state.
1767fn get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode> {
1768    let (send_chan, recv_chan) = mpsc::channel();
1769    kick_vcpus(VcpuControl::GetStates(send_chan));
1770    if vcpu_num == 0 {
1771        bail!("vcpu_num is zero");
1772    }
1773    let mut current_mode_vec: Vec<VmRunMode> = Vec::new();
1774    for _ in 0..vcpu_num {
1775        match recv_chan.recv() {
1776            Ok(state) => current_mode_vec.push(state),
1777            Err(e) => {
1778                bail!("Failed to get vCPU state: {}", e);
1779            }
1780        };
1781    }
1782    let first_state = current_mode_vec[0];
1783    if first_state == VmRunMode::Exiting {
1784        panic!("Attempt to snapshot while exiting.");
1785    }
1786    if current_mode_vec.iter().any(|x| *x != first_state) {
1787        // We do not panic here. It could be that vCPUs are transitioning from one mode to another.
1788        bail!("Unknown VM state: vCPUs hold different states.");
1789    }
1790    Ok(first_state)
1791}
1792
1793/// A guard to guarantee that all the vCPUs are suspended during the scope.
1794///
1795/// When this guard is dropped, it rolls back the state of CPUs.
1796pub struct VcpuSuspendGuard<'a> {
1797    saved_run_mode: VmRunMode,
1798    kick_vcpus: &'a dyn Fn(VcpuControl),
1799}
1800
1801impl<'a> VcpuSuspendGuard<'a> {
1802    /// Check the all vCPU state and suspend the vCPUs if they are running.
1803    ///
1804    /// This returns [VcpuSuspendGuard] to rollback the vcpu state.
1805    ///
1806    /// # Arguments
1807    ///
1808    /// * `kick_vcpus` - A funtion to send [VcpuControl] message to all the vCPUs and interrupt
1809    ///   them.
1810    /// * `vcpu_num` - The number of vCPUs.
1811    pub fn new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self> {
1812        // get initial vcpu state
1813        let saved_run_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1814        match saved_run_mode {
1815            VmRunMode::Running => {
1816                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1817                // Blocking call, waiting for response to ensure vCPU state was updated.
1818                // In case of failure, where a vCPU still has the state running, start up vcpus and
1819                // abort operation.
1820                let current_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1821                if current_mode != VmRunMode::Suspending {
1822                    kick_vcpus(VcpuControl::RunState(saved_run_mode));
1823                    bail!("vCPUs failed to all suspend. Kicking back all vCPUs to their previous state: {saved_run_mode}");
1824                }
1825            }
1826            VmRunMode::Suspending => {
1827                // do nothing. keep the state suspending.
1828            }
1829            other => {
1830                bail!("vcpus are not in running/suspending state, but {}", other);
1831            }
1832        };
1833        Ok(Self {
1834            saved_run_mode,
1835            kick_vcpus,
1836        })
1837    }
1838}
1839
1840impl Drop for VcpuSuspendGuard<'_> {
1841    fn drop(&mut self) {
1842        if self.saved_run_mode != VmRunMode::Suspending {
1843            (self.kick_vcpus)(VcpuControl::RunState(self.saved_run_mode));
1844        }
1845    }
1846}
1847
1848/// A guard to guarantee that all devices are sleeping during its scope.
1849///
1850/// When this guard is dropped, it wakes the devices.
1851pub struct DeviceSleepGuard<'a> {
1852    device_control_tube: &'a Tube,
1853    devices_state: DevicesState,
1854}
1855
1856impl<'a> DeviceSleepGuard<'a> {
1857    fn new(device_control_tube: &'a Tube) -> anyhow::Result<Self> {
1858        device_control_tube
1859            .send(&DeviceControlCommand::GetDevicesState)
1860            .context("send command to devices control socket")?;
1861        let devices_state = match device_control_tube
1862            .recv()
1863            .context("receive from devices control socket")?
1864        {
1865            VmResponse::DevicesState(state) => state,
1866            resp => bail!("failed to get devices state. Unexpected behavior: {}", resp),
1867        };
1868        if let DevicesState::Wake = devices_state {
1869            device_control_tube
1870                .send(&DeviceControlCommand::SleepDevices)
1871                .context("send command to devices control socket")?;
1872            match device_control_tube
1873                .recv()
1874                .context("receive from devices control socket")?
1875            {
1876                VmResponse::Ok => (),
1877                resp => bail!("device sleep failed: {}", resp),
1878            }
1879        }
1880        Ok(Self {
1881            device_control_tube,
1882            devices_state,
1883        })
1884    }
1885}
1886
1887impl Drop for DeviceSleepGuard<'_> {
1888    fn drop(&mut self) {
1889        if let DevicesState::Wake = self.devices_state {
1890            if let Err(e) = self
1891                .device_control_tube
1892                .send(&DeviceControlCommand::WakeDevices)
1893            {
1894                panic!("failed to request device wake after snapshot: {e}");
1895            }
1896            match self.device_control_tube.recv() {
1897                Ok(VmResponse::Ok) => (),
1898                Ok(resp) => panic!("unexpected response to device wake request: {resp}"),
1899                Err(e) => panic!("failed to get reply for device wake request: {e}"),
1900            }
1901        }
1902    }
1903}
1904
1905impl VmRequest {
1906    /// Executes this request on the given Vm and other mutable state.
1907    ///
1908    /// This does not return a result, instead encapsulating the success or failure in a
1909    /// `VmResponse` with the intended purpose of sending the response back over the  socket that
1910    /// received this `VmRequest`.
1911    ///
1912    /// `suspended_pvclock_state`: If the hypervisor has its own pvclock (not the same as
1913    /// virtio-pvclock) and the VM is suspended (not just the vCPUs, but the full VM), then
1914    /// `suspended_pvclock_state` will be used to store the ClockState saved just after the vCPUs
1915    /// were suspended. It is important that we save the value right after the vCPUs are suspended
1916    /// and restore it right before the vCPUs are resumed (instead of, more naturally, during the
1917    /// snapshot/restore steps) because the pvclock continues to tick even when the vCPUs are
1918    /// suspended.
1919    #[allow(unused_variables)]
1920    pub fn execute(
1921        &self,
1922        vm: &impl Vm,
1923        disk_host_tubes: &[Tube],
1924        snd_host_tubes: &[Tube],
1925        pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
1926        gpu_control_tube: Option<&Tube>,
1927        usb_control_tube: Option<&Tube>,
1928        bat_control: &mut Option<BatControl>,
1929        kick_vcpus: impl Fn(VcpuControl),
1930        #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl),
1931        force_s2idle: bool,
1932        #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1933        device_control_tube: &Tube,
1934        vcpu_size: usize,
1935        irq_handler_control: &Tube,
1936        snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
1937        suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
1938    ) -> VmResponse {
1939        match self {
1940            VmRequest::Exit => {
1941                panic!("VmRequest::Exit should be handled by the platform run loop");
1942            }
1943            VmRequest::Powerbtn => {
1944                if let Some(pm) = pm {
1945                    pm.lock().pwrbtn_evt();
1946                    VmResponse::Ok
1947                } else {
1948                    error!("{:#?} not supported", *self);
1949                    VmResponse::Err(SysError::new(ENOTSUP))
1950                }
1951            }
1952            VmRequest::Sleepbtn => {
1953                if let Some(pm) = pm {
1954                    pm.lock().slpbtn_evt();
1955                    VmResponse::Ok
1956                } else {
1957                    error!("{:#?} not supported", *self);
1958                    VmResponse::Err(SysError::new(ENOTSUP))
1959                }
1960            }
1961            VmRequest::Rtc { clear_evt } => {
1962                if let Some(pm) = pm.as_ref() {
1963                    match clear_evt.try_clone() {
1964                        Ok(clear_evt) => {
1965                            // RTC event will asynchronously trigger wakeup.
1966                            pm.lock().rtc_evt(clear_evt);
1967                            VmResponse::Ok
1968                        }
1969                        Err(err) => {
1970                            error!("Error cloning clear_evt: {:?}", err);
1971                            VmResponse::Err(SysError::new(EIO))
1972                        }
1973                    }
1974                } else {
1975                    error!("{:#?} not supported", *self);
1976                    VmResponse::Err(SysError::new(ENOTSUP))
1977                }
1978            }
1979            VmRequest::SuspendVcpus => {
1980                if !force_s2idle {
1981                    kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1982                    let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
1983                        Ok(state) => state,
1984                        Err(e) => {
1985                            error!("failed to get vcpu state: {e}");
1986                            return VmResponse::Err(SysError::new(EIO));
1987                        }
1988                    };
1989                    if current_mode != VmRunMode::Suspending {
1990                        error!("vCPUs failed to all suspend.");
1991                        return VmResponse::Err(SysError::new(EIO));
1992                    }
1993                }
1994                VmResponse::Ok
1995            }
1996            VmRequest::ResumeVcpus => {
1997                if let Err(e) = device_control_tube.send(&DeviceControlCommand::GetDevicesState) {
1998                    error!("failed to send GetDevicesState: {}", e);
1999                    return VmResponse::Err(SysError::new(EIO));
2000                }
2001                let devices_state = match device_control_tube.recv() {
2002                    Ok(VmResponse::DevicesState(state)) => state,
2003                    Ok(resp) => {
2004                        error!("failed to get devices state. Unexpected behavior: {}", resp);
2005                        return VmResponse::Err(SysError::new(EINVAL));
2006                    }
2007                    Err(e) => {
2008                        error!("failed to get devices state. Unexpected behavior: {}", e);
2009                        return VmResponse::Err(SysError::new(EINVAL));
2010                    }
2011                };
2012                if let DevicesState::Sleep = devices_state {
2013                    error!("Trying to wake Vcpus while Devices are asleep. Did you mean to use `crosvm resume --full`?");
2014                    return VmResponse::Err(SysError::new(EINVAL));
2015                }
2016
2017                if force_s2idle {
2018                    // During resume also emulate powerbtn event which will allow to wakeup fully
2019                    // suspended guest.
2020                    if let Some(pm) = pm {
2021                        pm.lock().pwrbtn_evt();
2022                    } else {
2023                        error!("triggering power btn during resume not supported");
2024                        return VmResponse::Err(SysError::new(ENOTSUP));
2025                    }
2026                }
2027
2028                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2029                VmResponse::Ok
2030            }
2031            VmRequest::Swap(SwapCommand::Enable) => {
2032                #[cfg(feature = "swap")]
2033                if let Some(swap_controller) = swap_controller {
2034                    // Suspend all vcpus and devices while vmm-swap is enabling (move the guest
2035                    // memory contents to the staging memory) to guarantee no processes other than
2036                    // the swap monitor process access the guest memory.
2037                    let _vcpu_guard = match VcpuSuspendGuard::new(&kick_vcpus, vcpu_size) {
2038                        Ok(guard) => guard,
2039                        Err(e) => {
2040                            error!("failed to suspend vcpus: {:?}", e);
2041                            return VmResponse::Err(SysError::new(EINVAL));
2042                        }
2043                    };
2044                    // TODO(b/253386409): Use `devices::Suspendable::sleep()` instead of sending
2045                    // `SIGSTOP` signal.
2046                    let _devices_guard = match swap_controller.suspend_devices() {
2047                        Ok(guard) => guard,
2048                        Err(e) => {
2049                            error!("failed to suspend devices: {:?}", e);
2050                            return VmResponse::Err(SysError::new(EINVAL));
2051                        }
2052                    };
2053
2054                    return match swap_controller.enable() {
2055                        Ok(()) => VmResponse::Ok,
2056                        Err(e) => {
2057                            error!("swap enable failed: {}", e);
2058                            VmResponse::Err(SysError::new(EINVAL))
2059                        }
2060                    };
2061                }
2062                VmResponse::Err(SysError::new(ENOTSUP))
2063            }
2064            VmRequest::Swap(SwapCommand::Trim) => {
2065                #[cfg(feature = "swap")]
2066                if let Some(swap_controller) = swap_controller {
2067                    return match swap_controller.trim() {
2068                        Ok(()) => VmResponse::Ok,
2069                        Err(e) => {
2070                            error!("swap trim failed: {}", e);
2071                            VmResponse::Err(SysError::new(EINVAL))
2072                        }
2073                    };
2074                }
2075                VmResponse::Err(SysError::new(ENOTSUP))
2076            }
2077            VmRequest::Swap(SwapCommand::SwapOut) => {
2078                #[cfg(feature = "swap")]
2079                if let Some(swap_controller) = swap_controller {
2080                    return match swap_controller.swap_out() {
2081                        Ok(()) => VmResponse::Ok,
2082                        Err(e) => {
2083                            error!("swap out failed: {}", e);
2084                            VmResponse::Err(SysError::new(EINVAL))
2085                        }
2086                    };
2087                }
2088                VmResponse::Err(SysError::new(ENOTSUP))
2089            }
2090            VmRequest::Swap(SwapCommand::Disable {
2091                #[cfg(feature = "swap")]
2092                slow_file_cleanup,
2093                ..
2094            }) => {
2095                #[cfg(feature = "swap")]
2096                if let Some(swap_controller) = swap_controller {
2097                    return match swap_controller.disable(*slow_file_cleanup) {
2098                        Ok(()) => VmResponse::Ok,
2099                        Err(e) => {
2100                            error!("swap disable failed: {}", e);
2101                            VmResponse::Err(SysError::new(EINVAL))
2102                        }
2103                    };
2104                }
2105                VmResponse::Err(SysError::new(ENOTSUP))
2106            }
2107            VmRequest::Swap(SwapCommand::Status) => {
2108                #[cfg(feature = "swap")]
2109                if let Some(swap_controller) = swap_controller {
2110                    return match swap_controller.status() {
2111                        Ok(status) => VmResponse::SwapStatus(status),
2112                        Err(e) => {
2113                            error!("swap status failed: {}", e);
2114                            VmResponse::Err(SysError::new(EINVAL))
2115                        }
2116                    };
2117                }
2118                VmResponse::Err(SysError::new(ENOTSUP))
2119            }
2120            VmRequest::SuspendVm => {
2121                info!("Starting crosvm suspend");
2122                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
2123                let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
2124                    Ok(state) => state,
2125                    Err(e) => {
2126                        error!("failed to get vcpu state: {e}");
2127                        return VmResponse::Err(SysError::new(EIO));
2128                    }
2129                };
2130                if current_mode != VmRunMode::Suspending {
2131                    error!("vCPUs failed to all suspend.");
2132                    return VmResponse::Err(SysError::new(EIO));
2133                }
2134                // Snapshot the pvclock ASAP after stopping vCPUs.
2135                if vm.check_capability(VmCap::PvClock) {
2136                    if suspended_pvclock_state.is_none() {
2137                        *suspended_pvclock_state = Some(match vm.get_pvclock() {
2138                            Ok(x) => x,
2139                            Err(e) => {
2140                                error!("suspend_pvclock failed: {e:?}");
2141                                return VmResponse::Err(SysError::new(EIO));
2142                            }
2143                        });
2144                    }
2145                }
2146                if let Err(e) = device_control_tube
2147                    .send(&DeviceControlCommand::SleepDevices)
2148                    .context("send command to devices control socket")
2149                {
2150                    error!("{:?}", e);
2151                    return VmResponse::Err(SysError::new(EIO));
2152                };
2153                match device_control_tube
2154                    .recv()
2155                    .context("receive from devices control socket")
2156                {
2157                    Ok(VmResponse::Ok) => {
2158                        info!("Finished crosvm suspend successfully");
2159                        VmResponse::Ok
2160                    }
2161                    Ok(resp) => {
2162                        error!("device sleep failed: {}", resp);
2163                        VmResponse::Err(SysError::new(EIO))
2164                    }
2165                    Err(e) => {
2166                        error!("receive from devices control socket: {:?}", e);
2167                        VmResponse::Err(SysError::new(EIO))
2168                    }
2169                }
2170            }
2171            VmRequest::ResumeVm => {
2172                info!("Starting crosvm resume");
2173                if let Err(e) = device_control_tube
2174                    .send(&DeviceControlCommand::WakeDevices)
2175                    .context("send command to devices control socket")
2176                {
2177                    error!("{:?}", e);
2178                    return VmResponse::Err(SysError::new(EIO));
2179                };
2180                match device_control_tube
2181                    .recv()
2182                    .context("receive from devices control socket")
2183                {
2184                    Ok(VmResponse::Ok) => {
2185                        info!("Finished crosvm resume successfully");
2186                    }
2187                    Ok(resp) => {
2188                        error!("device wake failed: {}", resp);
2189                        return VmResponse::Err(SysError::new(EIO));
2190                    }
2191                    Err(e) => {
2192                        error!("receive from devices control socket: {:?}", e);
2193                        return VmResponse::Err(SysError::new(EIO));
2194                    }
2195                }
2196                // Resume the pvclock as late as possible before starting vCPUs.
2197                if vm.check_capability(VmCap::PvClock) {
2198                    // If None, then we aren't suspended, which is a valid case.
2199                    if let Some(x) = suspended_pvclock_state {
2200                        if let Err(e) = vm.set_pvclock(x) {
2201                            error!("resume_pvclock failed: {e:?}");
2202                            return VmResponse::Err(SysError::new(EIO));
2203                        }
2204                    }
2205                }
2206                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2207                VmResponse::Ok
2208            }
2209            VmRequest::Gpe { gpe, clear_evt } => {
2210                if let Some(pm) = pm.as_ref() {
2211                    match clear_evt.as_ref().map(|e| e.try_clone()).transpose() {
2212                        Ok(clear_evt) => {
2213                            pm.lock().gpe_evt(*gpe, clear_evt);
2214                            VmResponse::Ok
2215                        }
2216                        Err(err) => {
2217                            error!("Error cloning clear_evt: {:?}", err);
2218                            VmResponse::Err(SysError::new(EIO))
2219                        }
2220                    }
2221                } else {
2222                    error!("{:#?} not supported", *self);
2223                    VmResponse::Err(SysError::new(ENOTSUP))
2224                }
2225            }
2226            VmRequest::PciPme(requester_id) => {
2227                if let Some(pm) = pm.as_ref() {
2228                    pm.lock().pme_evt(*requester_id);
2229                    VmResponse::Ok
2230                } else {
2231                    error!("{:#?} not supported", *self);
2232                    VmResponse::Err(SysError::new(ENOTSUP))
2233                }
2234            }
2235            VmRequest::MakeRT => {
2236                kick_vcpus(VcpuControl::MakeRT);
2237                VmResponse::Ok
2238            }
2239            #[cfg(feature = "balloon")]
2240            VmRequest::BalloonCommand(_) => unreachable!("Should be handled with BalloonTube"),
2241            VmRequest::DiskCommand {
2242                disk_index,
2243                ref command,
2244            } => match &disk_host_tubes.get(*disk_index) {
2245                Some(tube) => handle_disk_command(command, tube),
2246                None => VmResponse::Err(SysError::new(ENODEV)),
2247            },
2248            #[cfg(feature = "gpu")]
2249            VmRequest::GpuCommand(ref cmd) => match gpu_control_tube {
2250                Some(gpu_control) => {
2251                    let res = gpu_control.send(cmd);
2252                    if let Err(e) = res {
2253                        error!("fail to send command to gpu control socket: {}", e);
2254                        return VmResponse::Err(SysError::new(EIO));
2255                    }
2256                    match gpu_control.recv() {
2257                        Ok(response) => VmResponse::GpuResponse(response),
2258                        Err(e) => {
2259                            error!("fail to recv command from gpu control socket: {}", e);
2260                            VmResponse::Err(SysError::new(EIO))
2261                        }
2262                    }
2263                }
2264                None => {
2265                    error!("gpu control is not enabled in crosvm");
2266                    VmResponse::Err(SysError::new(EIO))
2267                }
2268            },
2269            VmRequest::UsbCommand(ref cmd) => {
2270                let usb_control_tube = match usb_control_tube {
2271                    Some(t) => t,
2272                    None => {
2273                        error!("attempted to execute USB request without control tube");
2274                        return VmResponse::Err(SysError::new(ENODEV));
2275                    }
2276                };
2277                let res = usb_control_tube.send(cmd);
2278                if let Err(e) = res {
2279                    error!("fail to send command to usb control socket: {}", e);
2280                    return VmResponse::Err(SysError::new(EIO));
2281                }
2282                match usb_control_tube.recv() {
2283                    Ok(response) => VmResponse::UsbResponse(response),
2284                    Err(e) => {
2285                        error!("fail to recv command from usb control socket: {}", e);
2286                        VmResponse::Err(SysError::new(EIO))
2287                    }
2288                }
2289            }
2290            VmRequest::BatCommand(type_, ref cmd) => {
2291                match bat_control {
2292                    Some(battery) => {
2293                        if battery.type_ != *type_ {
2294                            error!("ignored battery command due to battery type: expected {:?}, got {:?}", battery.type_, type_);
2295                            return VmResponse::Err(SysError::new(EINVAL));
2296                        }
2297
2298                        let res = battery.control_tube.send(cmd);
2299                        if let Err(e) = res {
2300                            error!("fail to send command to bat control socket: {}", e);
2301                            return VmResponse::Err(SysError::new(EIO));
2302                        }
2303
2304                        match battery.control_tube.recv() {
2305                            Ok(response) => VmResponse::BatResponse(response),
2306                            Err(e) => {
2307                                error!("fail to recv command from bat control socket: {}", e);
2308                                VmResponse::Err(SysError::new(EIO))
2309                            }
2310                        }
2311                    }
2312                    None => VmResponse::BatResponse(BatControlResult::NoBatDevice),
2313                }
2314            }
2315            #[cfg(feature = "audio")]
2316            VmRequest::SndCommand(ref cmd) => match cmd {
2317                SndControlCommand::MuteAll(muted) => {
2318                    for tube in snd_host_tubes {
2319                        let res = tube.send(&SndControlCommand::MuteAll(*muted));
2320                        if let Err(e) = res {
2321                            error!("fail to send command to snd control socket: {}", e);
2322                            return VmResponse::Err(SysError::new(EIO));
2323                        }
2324
2325                        match tube.recv() {
2326                            Ok(VmResponse::Ok) => {
2327                                debug!("device is successfully muted");
2328                            }
2329                            Ok(resp) => {
2330                                error!("mute failed: {}", resp);
2331                                return VmResponse::ErrString("fail to mute the device".to_owned());
2332                            }
2333                            Err(e) => return VmResponse::Err(SysError::new(EIO)),
2334                        }
2335                    }
2336                    VmResponse::Ok
2337                }
2338            },
2339            VmRequest::HotPlugVfioCommand { device: _, add: _ } => VmResponse::Ok,
2340            #[cfg(feature = "pci-hotplug")]
2341            VmRequest::HotPlugNetCommand(ref _net_cmd) => {
2342                VmResponse::ErrString("hot plug not supported".to_owned())
2343            }
2344            VmRequest::Snapshot(SnapshotCommand::Take {
2345                ref snapshot_path,
2346                compress_memory,
2347                encrypt,
2348            }) => {
2349                info!("Starting crosvm snapshot");
2350                match do_snapshot(
2351                    snapshot_path.to_path_buf(),
2352                    kick_vcpus,
2353                    irq_handler_control,
2354                    device_control_tube,
2355                    vcpu_size,
2356                    snapshot_irqchip,
2357                    *compress_memory,
2358                    *encrypt,
2359                    suspended_pvclock_state,
2360                    vm,
2361                ) {
2362                    Ok(()) => {
2363                        info!("Finished crosvm snapshot successfully");
2364                        VmResponse::Ok
2365                    }
2366                    Err(e) => {
2367                        error!("failed to handle snapshot: {:?}", e);
2368                        VmResponse::Err(SysError::new(EIO))
2369                    }
2370                }
2371            }
2372            VmRequest::RegisterListener {
2373                socket_addr: _,
2374                event: _,
2375            } => VmResponse::Ok,
2376            VmRequest::UnregisterListener {
2377                socket_addr: _,
2378                event: _,
2379            } => VmResponse::Ok,
2380            VmRequest::Unregister { socket_addr: _ } => VmResponse::Ok,
2381            VmRequest::VcpuPidTid => unreachable!(),
2382            VmRequest::Throttle(_, _) => unreachable!(),
2383            VmRequest::GetVmDescriptor => {
2384                let vm_fd = match vm.try_clone_descriptor() {
2385                    Ok(vm_fd) => vm_fd,
2386                    Err(e) => {
2387                        error!("failed to get vm_fd: {:?}", e);
2388                        return VmResponse::Err(e);
2389                    }
2390                };
2391                VmResponse::VmDescriptor {
2392                    hypervisor: vm.hypervisor_kind(),
2393                    vm_fd,
2394                }
2395            }
2396            VmRequest::RegisterMemory { .. } => unreachable!(),
2397            VmRequest::UnregisterMemory { .. } => unreachable!(),
2398        }
2399    }
2400}
2401
2402/// Snapshot the VM to file at `snapshot_path`
2403fn do_snapshot(
2404    snapshot_path: PathBuf,
2405    kick_vcpus: impl Fn(VcpuControl),
2406    irq_handler_control: &Tube,
2407    device_control_tube: &Tube,
2408    vcpu_size: usize,
2409    snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
2410    compress_memory: bool,
2411    encrypt: bool,
2412    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2413    vm: &impl Vm,
2414) -> anyhow::Result<()> {
2415    let snapshot_start = Instant::now();
2416
2417    let _vcpu_guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size)?;
2418    let _device_guard = DeviceSleepGuard::new(device_control_tube)?;
2419
2420    // We want to flush all pending IRQs to the interrupt controller. There are two cases:
2421    //
2422    // MSIs: these are directly delivered to the interrupt controller.
2423    // We must verify the handler thread cycles once to deliver these interrupts.
2424    //
2425    // Legacy interrupts: in the case of a split IRQ chip, these interrupts may
2426    // flow through the userspace IOAPIC. If the hypervisor does not support
2427    // irqfds (e.g. WHPX), a single iteration will only flush the IRQ to the
2428    // IOAPIC. The underlying MSI will be asserted at this point, but if the
2429    // IRQ handler doesn't run another iteration, it won't be delivered to the
2430    // interrupt controller. This is why we cycle the handler thread twice (doing so
2431    // ensures we process the underlying MSI).
2432    //
2433    // We can handle both of these cases by iterating until there are no tokens
2434    // serviced on the requested iteration. Note that in the legacy case, this
2435    // ensures at least two iterations.
2436    //
2437    // Note: within CrosVM, *all* interrupts are eventually converted into the
2438    // same mechanicism that MSIs use. This is why we say "underlying" MSI for
2439    // a legacy IRQ.
2440    {
2441        let mut flush_attempts = 0;
2442        loop {
2443            irq_handler_control
2444                .send(&IrqHandlerRequest::WakeAndNotifyIteration)
2445                .context("failed to send flush command to IRQ handler thread")?;
2446            let resp = irq_handler_control
2447                .recv()
2448                .context("failed to recv flush response from IRQ handler thread")?;
2449            match resp {
2450                IrqHandlerResponse::HandlerIterationComplete(tokens_serviced) => {
2451                    if tokens_serviced == 0 {
2452                        break;
2453                    }
2454                }
2455                _ => bail!("received unexpected reply from IRQ handler: {:?}", resp),
2456            }
2457            flush_attempts += 1;
2458            if flush_attempts > EXPECTED_MAX_IRQ_FLUSH_ITERATIONS {
2459                warn!(
2460                    "flushing IRQs for snapshot may be stalled after iteration {}, expected <= {}
2461                      iterations",
2462                    flush_attempts, EXPECTED_MAX_IRQ_FLUSH_ITERATIONS
2463                );
2464            }
2465        }
2466        info!("flushed IRQs in {} iterations", flush_attempts);
2467    }
2468    let snapshot_writer = SnapshotWriter::new(snapshot_path, encrypt)?;
2469
2470    // Snapshot hypervisor's paravirtualized clock.
2471    snapshot_writer.write_fragment("pvclock", &AnySnapshot::to_any(suspended_pvclock_state)?)?;
2472
2473    // Snapshot Vcpus
2474    info!("VCPUs snapshotting...");
2475    let (send_chan, recv_chan) = mpsc::channel();
2476    kick_vcpus(VcpuControl::Snapshot(
2477        snapshot_writer.add_namespace("vcpu")?,
2478        send_chan,
2479    ));
2480    // Validate all Vcpus snapshot successfully
2481    for _ in 0..vcpu_size {
2482        recv_chan
2483            .recv()
2484            .context("Failed to recv Vcpu snapshot response")?
2485            .context("Failed to snapshot Vcpu")?;
2486    }
2487    info!("VCPUs snapshotted.");
2488
2489    // Snapshot irqchip
2490    info!("Snapshotting irqchip...");
2491    let irqchip_snap = snapshot_irqchip()?;
2492    snapshot_writer
2493        .write_fragment("irqchip", &irqchip_snap)
2494        .context("Failed to write irqchip state")?;
2495    info!("Snapshotted irqchip.");
2496
2497    // Snapshot memory
2498    {
2499        let mem_snap_start = Instant::now();
2500        // Use 64MB chunks when writing the memory snapshot (if encryption is used).
2501        const MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES: usize = 1024 * 1024 * 64;
2502        // SAFETY:
2503        // VM & devices are stopped.
2504        let guest_memory_metadata = unsafe {
2505            vm.get_memory()
2506                .snapshot(
2507                    &mut snapshot_writer.raw_fragment_with_chunk_size(
2508                        "mem",
2509                        MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES,
2510                    )?,
2511                    compress_memory,
2512                )
2513                .context("failed to snapshot memory")?
2514        };
2515        snapshot_writer.write_fragment("mem_metadata", &guest_memory_metadata)?;
2516
2517        let mem_snap_duration_ms = mem_snap_start.elapsed().as_millis();
2518        info!(
2519            "snapshot: memory snapshotted {}MB in {}ms",
2520            vm.get_memory().memory_size() / 1024 / 1024,
2521            mem_snap_duration_ms
2522        );
2523        metrics::log_metric_with_details(
2524            metrics::MetricEventType::SnapshotSaveMemoryLatency,
2525            mem_snap_duration_ms as i64,
2526            &metrics_events::RecordDetails {},
2527        );
2528    }
2529    // Snapshot devices
2530    info!("Devices snapshotting...");
2531    device_control_tube
2532        .send(&DeviceControlCommand::SnapshotDevices { snapshot_writer })
2533        .context("send command to devices control socket")?;
2534    let resp: VmResponse = device_control_tube
2535        .recv()
2536        .context("receive from devices control socket")?;
2537    if !matches!(resp, VmResponse::Ok) {
2538        bail!("unexpected SnapshotDevices response: {resp}");
2539    }
2540    info!("Devices snapshotted.");
2541
2542    let snap_duration_ms = snapshot_start.elapsed().as_millis();
2543    info!(
2544        "snapshot: completed snapshot in {}ms; VM mem size: {}MB",
2545        snap_duration_ms,
2546        vm.get_memory().memory_size() / 1024 / 1024,
2547    );
2548    metrics::log_metric_with_details(
2549        metrics::MetricEventType::SnapshotSaveOverallLatency,
2550        snap_duration_ms as i64,
2551        &metrics_events::RecordDetails {},
2552    );
2553    Ok(())
2554}
2555
2556/// Restore the VM to the snapshot at `restore_path`.
2557///
2558/// Same as `VmRequest::execute` with a `VmRequest::Restore`. Exposed as a separate function
2559/// because not all the `VmRequest::execute` arguments are available in the "cold restore" flow.
2560pub fn do_restore(
2561    restore_path: &Path,
2562    kick_vcpus: impl Fn(VcpuControl),
2563    kick_vcpu: impl Fn(VcpuControl, usize),
2564    irq_handler_control: &Tube,
2565    device_control_tube: &Tube,
2566    vcpu_size: usize,
2567    mut restore_irqchip: impl FnMut(AnySnapshot) -> anyhow::Result<()>,
2568    require_encrypted: bool,
2569    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2570    vm: &impl Vm,
2571) -> anyhow::Result<()> {
2572    let restore_start = Instant::now();
2573    let _guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size);
2574    let _devices_guard = DeviceSleepGuard::new(device_control_tube)?;
2575
2576    let snapshot_reader = SnapshotReader::new(restore_path, require_encrypted)?;
2577
2578    // Restore hypervisor's paravirtualized clock.
2579    *suspended_pvclock_state = snapshot_reader.read_fragment("pvclock")?;
2580
2581    // Restore IrqChip
2582    let irq_snapshot: AnySnapshot = snapshot_reader.read_fragment("irqchip")?;
2583    restore_irqchip(irq_snapshot)?;
2584
2585    // Restore Vcpu(s)
2586    let vcpu_snapshot_reader = snapshot_reader.namespace("vcpu")?;
2587    let vcpu_snapshot_count = vcpu_snapshot_reader.list_fragments()?.len();
2588    if vcpu_snapshot_count != vcpu_size {
2589        bail!(
2590            "bad cpu count in snapshot: expected={} got={}",
2591            vcpu_size,
2592            vcpu_snapshot_count,
2593        );
2594    }
2595    #[cfg(target_arch = "x86_64")]
2596    let host_tsc_reference_moment = {
2597        // SAFETY: rdtsc takes no arguments.
2598        unsafe { _rdtsc() }
2599    };
2600    let (send_chan, recv_chan) = mpsc::channel();
2601    for vcpu_id in 0..vcpu_size {
2602        kick_vcpu(
2603            VcpuControl::Restore(VcpuRestoreRequest {
2604                result_sender: send_chan.clone(),
2605                snapshot_reader: vcpu_snapshot_reader.clone(),
2606                #[cfg(target_arch = "x86_64")]
2607                host_tsc_reference_moment,
2608            }),
2609            vcpu_id,
2610        );
2611    }
2612    for _ in 0..vcpu_size {
2613        recv_chan
2614            .recv()
2615            .context("Failed to recv restore response")?
2616            .context("Failed to restore vcpu")?;
2617    }
2618
2619    // Restore Memory
2620    {
2621        let mem_restore_start = Instant::now();
2622        let guest_memory_metadata = snapshot_reader.read_fragment("mem_metadata")?;
2623        // SAFETY:
2624        // VM & devices are stopped.
2625        unsafe {
2626            vm.get_memory().restore(
2627                guest_memory_metadata,
2628                &mut snapshot_reader.raw_fragment("mem")?,
2629            )?
2630        };
2631        let mem_restore_duration_ms = mem_restore_start.elapsed().as_millis();
2632        info!(
2633            "snapshot: memory restored {}MB in {}ms",
2634            vm.get_memory().memory_size() / 1024 / 1024,
2635            mem_restore_duration_ms
2636        );
2637        metrics::log_metric_with_details(
2638            metrics::MetricEventType::SnapshotRestoreMemoryLatency,
2639            mem_restore_duration_ms as i64,
2640            &metrics_events::RecordDetails {},
2641        );
2642    }
2643    // Restore devices
2644    device_control_tube
2645        .send(&DeviceControlCommand::RestoreDevices {
2646            snapshot_reader: snapshot_reader.clone(),
2647        })
2648        .context("send restore devices command to devices control socket")?;
2649    let resp: VmResponse = device_control_tube
2650        .recv()
2651        .context("receive from devices control socket")?;
2652    if !matches!(resp, VmResponse::Ok) {
2653        bail!("unexpected RestoreDevices response: {resp}");
2654    }
2655
2656    // refresh the IRQ tokens.
2657    {
2658        irq_handler_control
2659            .send(&IrqHandlerRequest::RefreshIrqEventTokens)
2660            .context("failed to send refresh irq event token command to IRQ handler thread")?;
2661        let resp: IrqHandlerResponse = irq_handler_control
2662            .recv()
2663            .context("failed to recv refresh response from IRQ handler thread")?;
2664        if !matches!(resp, IrqHandlerResponse::IrqEventTokenRefreshComplete) {
2665            bail!(
2666                "received unexpected reply from IRQ handler thread: {:?}",
2667                resp
2668            );
2669        }
2670    }
2671
2672    let restore_duration_ms = restore_start.elapsed().as_millis();
2673    info!(
2674        "snapshot: completed restore in {}ms; mem size: {}",
2675        restore_duration_ms,
2676        vm.get_memory().memory_size(),
2677    );
2678
2679    metrics::log_metric_with_details(
2680        metrics::MetricEventType::SnapshotRestoreOverallLatency,
2681        restore_duration_ms as i64,
2682        &metrics_events::RecordDetails {},
2683    );
2684    Ok(())
2685}
2686
2687pub type HypervisorKind = hypervisor::HypervisorKind;
2688
2689/// Indication of success or failure of a `VmRequest`.
2690///
2691/// Success is usually indicated `VmResponse::Ok` unless there is data associated with the response.
2692#[derive(Serialize, Deserialize, Debug)]
2693#[must_use]
2694pub enum VmResponse {
2695    /// Indicates the request was executed successfully.
2696    Ok,
2697    /// Indicates the request encountered some error during execution.
2698    Err(SysError),
2699    /// Indicates the request encountered some error during execution.
2700    ErrString(String),
2701    /// The memory was registered into guest address space in memory slot number `slot`.
2702    RegisterMemory { slot: u32 },
2703    /// Variant of the register memory but with region_id.
2704    RegisterMemory2 { region_id: u64 },
2705    /// Results of balloon control commands.
2706    #[cfg(feature = "balloon")]
2707    BalloonStats {
2708        stats: balloon_control::BalloonStats,
2709        balloon_actual: u64,
2710    },
2711    /// Results of balloon WS-R command
2712    #[cfg(feature = "balloon")]
2713    BalloonWS {
2714        ws: balloon_control::BalloonWS,
2715        balloon_actual: u64,
2716    },
2717    /// Results of PCI hot plug
2718    #[cfg(feature = "pci-hotplug")]
2719    PciHotPlugResponse { bus: u8 },
2720    /// Results of usb control commands.
2721    UsbResponse(UsbControlResult),
2722    #[cfg(feature = "gpu")]
2723    /// Results of gpu control commands.
2724    GpuResponse(GpuControlResult),
2725    /// Results of battery control commands.
2726    BatResponse(BatControlResult),
2727    /// Results of swap status command.
2728    SwapStatus(SwapStatus),
2729    /// Gets the state of Devices (sleep/wake)
2730    DevicesState(DevicesState),
2731    /// Map of the Vcpu PID/TIDs
2732    VcpuPidTidResponse {
2733        pid_tid_map: BTreeMap<usize, (u32, u32)>,
2734    },
2735    VmDescriptor {
2736        hypervisor: HypervisorKind,
2737        vm_fd: SafeDescriptor,
2738    },
2739}
2740
2741impl Display for VmResponse {
2742    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2743        use self::VmResponse::*;
2744
2745        match self {
2746            Ok => write!(f, "ok"),
2747            Err(e) => write!(f, "error: {e}"),
2748            ErrString(e) => write!(f, "error: {e}"),
2749            RegisterMemory { slot } => write!(f, "memory registered in slot {slot}"),
2750            RegisterMemory2 { region_id } => {
2751                write!(f, "memory registered in region id {region_id}")
2752            }
2753            #[cfg(feature = "balloon")]
2754            VmResponse::BalloonStats {
2755                stats,
2756                balloon_actual,
2757            } => {
2758                write!(
2759                    f,
2760                    "stats: {}\nballoon_actual: {}",
2761                    serde_json::to_string_pretty(&stats)
2762                        .unwrap_or_else(|_| "invalid_response".to_string()),
2763                    balloon_actual
2764                )
2765            }
2766            #[cfg(feature = "balloon")]
2767            VmResponse::BalloonWS { ws, balloon_actual } => {
2768                write!(
2769                    f,
2770                    "ws: {}, balloon_actual: {}",
2771                    serde_json::to_string_pretty(&ws)
2772                        .unwrap_or_else(|_| "invalid_response".to_string()),
2773                    balloon_actual,
2774                )
2775            }
2776            UsbResponse(result) => write!(f, "usb control request get result {result:?}"),
2777            #[cfg(feature = "pci-hotplug")]
2778            PciHotPlugResponse { bus } => write!(f, "pci hotplug bus {bus:?}"),
2779            #[cfg(feature = "gpu")]
2780            GpuResponse(result) => write!(f, "gpu control request result {result:?}"),
2781            BatResponse(result) => write!(f, "{result}"),
2782            SwapStatus(status) => {
2783                write!(
2784                    f,
2785                    "{}",
2786                    serde_json::to_string(&status)
2787                        .unwrap_or_else(|_| "invalid_response".to_string()),
2788                )
2789            }
2790            DevicesState(status) => write!(f, "devices status: {status:?}"),
2791            VcpuPidTidResponse { pid_tid_map } => write!(f, "vcpu pid tid map: {pid_tid_map:?}"),
2792            VmDescriptor { hypervisor, vm_fd } => {
2793                write!(f, "hypervisor: {hypervisor:?}, vm_fd: {vm_fd:?}")
2794            }
2795        }
2796    }
2797}
2798
2799/// Enum that allows remote control of a wait context (used between the Windows GpuDisplay & the
2800/// GPU worker).
2801#[derive(Serialize, Deserialize)]
2802pub enum ModifyWaitContext {
2803    Add(#[serde(with = "with_as_descriptor")] Descriptor),
2804}
2805
2806#[sorted]
2807#[derive(Error, Debug)]
2808pub enum VirtioIOMMUVfioError {
2809    #[error("socket failed")]
2810    SocketFailed,
2811    #[error("unexpected response: {0}")]
2812    UnexpectedResponse(VirtioIOMMUResponse),
2813    #[error("unknown command: `{0}`")]
2814    UnknownCommand(String),
2815    #[error("{0}")]
2816    VfioControl(VirtioIOMMUVfioResult),
2817}
2818
2819#[derive(Serialize, Deserialize, Debug)]
2820pub enum VirtioIOMMUVfioCommand {
2821    // Add the vfio device attached to virtio-iommu.
2822    VfioDeviceAdd {
2823        endpoint_addr: u32,
2824        wrapper_id: u32,
2825        #[serde(with = "with_as_descriptor")]
2826        container: File,
2827    },
2828    // Delete the vfio device attached to virtio-iommu.
2829    VfioDeviceDel {
2830        endpoint_addr: u32,
2831    },
2832    // Map a dma-buf into vfio iommu table
2833    VfioDmabufMap {
2834        region_id: VmMemoryRegionId,
2835        gpa: u64,
2836        size: u64,
2837        dma_buf: SafeDescriptor,
2838    },
2839    // Unmap a dma-buf from vfio iommu table
2840    VfioDmabufUnmap(VmMemoryRegionId),
2841}
2842
2843#[derive(Serialize, Deserialize, Debug)]
2844pub enum VirtioIOMMUVfioResult {
2845    Ok,
2846    NotInPCIRanges,
2847    NoAvailableContainer,
2848    NoSuchDevice,
2849    NoSuchMappedDmabuf,
2850    InvalidParam,
2851}
2852
2853impl Display for VirtioIOMMUVfioResult {
2854    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2855        use self::VirtioIOMMUVfioResult::*;
2856
2857        match self {
2858            Ok => write!(f, "successfully"),
2859            NotInPCIRanges => write!(f, "not in the pci ranges of virtio-iommu"),
2860            NoAvailableContainer => write!(f, "no available vfio container"),
2861            NoSuchDevice => write!(f, "no such a vfio device"),
2862            NoSuchMappedDmabuf => write!(f, "no such a mapped dmabuf"),
2863            InvalidParam => write!(f, "invalid parameters"),
2864        }
2865    }
2866}
2867
2868/// A request to the virtio-iommu process to perform some operations.
2869///
2870/// Unless otherwise noted, each request should expect a `VirtioIOMMUResponse::Ok` to be received on
2871/// success.
2872#[derive(Serialize, Deserialize, Debug)]
2873pub enum VirtioIOMMURequest {
2874    /// Command for vfio related operations.
2875    VfioCommand(VirtioIOMMUVfioCommand),
2876}
2877
2878/// Indication of success or failure of a `VirtioIOMMURequest`.
2879///
2880/// Success is usually indicated `VirtioIOMMUResponse::Ok` unless there is data associated with the
2881/// response.
2882#[derive(Serialize, Deserialize, Debug)]
2883pub enum VirtioIOMMUResponse {
2884    /// Indicates the request was executed successfully.
2885    Ok,
2886    /// Indicates the request encountered some error during execution.
2887    Err(SysError),
2888    /// Results for Vfio commands.
2889    VfioResponse(VirtioIOMMUVfioResult),
2890}
2891
2892impl Display for VirtioIOMMUResponse {
2893    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2894        use self::VirtioIOMMUResponse::*;
2895        match self {
2896            Ok => write!(f, "ok"),
2897            Err(e) => write!(f, "error: {e}"),
2898            VfioResponse(result) => write!(
2899                f,
2900                "The vfio-related virtio-iommu request got result: {result:?}"
2901            ),
2902        }
2903    }
2904}
2905
2906/// Send VirtioIOMMURequest without waiting for the response
2907pub fn virtio_iommu_request_async(
2908    iommu_control_tube: &Tube,
2909    req: &VirtioIOMMURequest,
2910) -> VirtioIOMMUResponse {
2911    match iommu_control_tube.send(&req) {
2912        Ok(_) => VirtioIOMMUResponse::Ok,
2913        Err(e) => {
2914            error!("virtio-iommu socket send failed: {:?}", e);
2915            VirtioIOMMUResponse::Err(SysError::last())
2916        }
2917    }
2918}
2919
2920pub type VirtioIOMMURequestResult = std::result::Result<VirtioIOMMUResponse, ()>;
2921
2922/// Send VirtioIOMMURequest and wait to get the response
2923pub fn virtio_iommu_request(
2924    iommu_control_tube: &Tube,
2925    req: &VirtioIOMMURequest,
2926) -> VirtioIOMMURequestResult {
2927    let response = match virtio_iommu_request_async(iommu_control_tube, req) {
2928        VirtioIOMMUResponse::Ok => match iommu_control_tube.recv() {
2929            Ok(response) => response,
2930            Err(e) => {
2931                error!("virtio-iommu socket recv failed: {:?}", e);
2932                VirtioIOMMUResponse::Err(SysError::last())
2933            }
2934        },
2935        resp => resp,
2936    };
2937    Ok(response)
2938}
2939
2940#[cfg(test)]
2941mod tests {
2942    use anyhow::anyhow;
2943
2944    use super::*;
2945
2946    #[test]
2947    fn vm_memory_response_error_should_serialize_and_deserialize_correctly() {
2948        let source_error: VmMemoryResponseError = anyhow!("root cause")
2949            .context("context 1")
2950            .context("context 2")
2951            .into();
2952        let serialized_bytes =
2953            serde_json::to_vec(&source_error).expect("should serialize to json successfully");
2954        let target_error = serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2955            .expect("should deserialize from json successfully");
2956        assert_eq!(source_error.0.to_string(), target_error.0.to_string());
2957        assert_eq!(
2958            source_error
2959                .0
2960                .chain()
2961                .map(ToString::to_string)
2962                .collect::<Vec<_>>(),
2963            target_error
2964                .0
2965                .chain()
2966                .map(ToString::to_string)
2967                .collect::<Vec<_>>()
2968        );
2969    }
2970
2971    #[test]
2972    fn vm_memory_response_error_deserialization_should_handle_malformat_correctly() {
2973        let flat_source = FlatVmMemoryResponseError(vec![]);
2974        let serialized_bytes =
2975            serde_json::to_vec(&flat_source).expect("should serialize to json successfully");
2976        serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2977            .expect_err("deserialize with 0 error messages should fail");
2978    }
2979}