vm_control/
lib.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Handles IPC for controlling the main VM process.
6//!
7//! The VM Control IPC protocol is synchronous, meaning that each `VmRequest` sent over a connection
8//! will receive a `VmResponse` for that request next time data is received over that connection.
9//!
10//! The wire message format is a little-endian C-struct of fixed size, along with a file descriptor
11//! if the request type expects one.
12
13pub mod api;
14
15mod device_id;
16pub use device_id::DeviceId;
17pub use device_id::PciId;
18pub use device_id::PlatformDeviceId;
19
20#[cfg(feature = "gdb")]
21pub mod gdb;
22pub mod gpu;
23
24use base::debug;
25#[cfg(any(target_os = "android", target_os = "linux"))]
26use base::linux::MemoryMappingBuilderUnix;
27#[cfg(any(target_os = "android", target_os = "linux"))]
28use base::sys::call_with_extended_max_files;
29#[cfg(any(target_os = "android", target_os = "linux"))]
30use base::MemoryMappingArena;
31#[cfg(windows)]
32use base::MemoryMappingBuilderWindows;
33use hypervisor::BalloonEvent;
34use hypervisor::MemCacheType;
35use hypervisor::MemRegion;
36use snapshot::AnySnapshot;
37
38#[cfg(feature = "balloon")]
39mod balloon_tube;
40pub mod client;
41pub mod sys;
42
43#[cfg(target_arch = "x86_64")]
44use std::arch::x86_64::_rdtsc;
45use std::collections::BTreeMap;
46use std::collections::BTreeSet;
47use std::collections::HashMap;
48use std::convert::TryInto;
49use std::fmt;
50use std::fmt::Display;
51use std::fs::File;
52use std::path::Path;
53use std::path::PathBuf;
54use std::result::Result as StdResult;
55use std::str::FromStr;
56use std::sync::mpsc;
57use std::sync::Arc;
58use std::time::Instant;
59
60use anyhow::bail;
61use anyhow::Context;
62use base::error;
63use base::info;
64use base::warn;
65use base::with_as_descriptor;
66use base::AsRawDescriptor;
67use base::Descriptor;
68use base::Error as SysError;
69use base::Event;
70use base::ExternalMapping;
71use base::IntoRawDescriptor;
72use base::MappedRegion;
73use base::MemoryMappingBuilder;
74use base::MmapError;
75use base::Protection;
76use base::Result;
77use base::SafeDescriptor;
78use base::SharedMemory;
79use base::Tube;
80use hypervisor::Datamatch;
81use hypervisor::IoEventAddress;
82use hypervisor::IrqRoute;
83use hypervisor::IrqSource;
84pub use hypervisor::MemSlot;
85use hypervisor::Vm;
86use hypervisor::VmCap;
87use libc::EINVAL;
88use libc::EIO;
89use libc::ENODEV;
90use libc::ENOTSUP;
91use libc::ERANGE;
92#[cfg(feature = "registered_events")]
93use protos::registered_events;
94use remain::sorted;
95use resources::Alloc;
96use resources::SystemAllocator;
97use rutabaga_gfx::RutabagaDescriptor;
98use rutabaga_gfx::RutabagaFromRawDescriptor;
99use rutabaga_gfx::RutabagaGralloc;
100use rutabaga_gfx::RutabagaMappedRegion;
101use rutabaga_gfx::RutabagaMesaHandle;
102use rutabaga_gfx::VulkanInfo;
103use serde::de::Error;
104use serde::Deserialize;
105use serde::Serialize;
106use snapshot::SnapshotReader;
107use snapshot::SnapshotWriter;
108use swap::SwapStatus;
109use sync::Mutex;
110#[cfg(any(target_os = "android", target_os = "linux"))]
111pub use sys::FsMappingRequest;
112#[cfg(windows)]
113pub use sys::InitialAudioSessionState;
114#[cfg(any(target_os = "android", target_os = "linux"))]
115pub use sys::VmMemoryMappingRequest;
116#[cfg(any(target_os = "android", target_os = "linux"))]
117pub use sys::VmMemoryMappingResponse;
118use thiserror::Error;
119pub use vm_control_product::GpuSendToMain;
120pub use vm_control_product::GpuSendToService;
121pub use vm_control_product::ServiceSendToGpu;
122use vm_memory::GuestAddress;
123
124#[cfg(feature = "balloon")]
125pub use crate::balloon_tube::BalloonControlCommand;
126#[cfg(feature = "balloon")]
127pub use crate::balloon_tube::BalloonTube;
128#[cfg(feature = "gdb")]
129pub use crate::gdb::VcpuDebug;
130#[cfg(feature = "gdb")]
131pub use crate::gdb::VcpuDebugStatus;
132#[cfg(feature = "gdb")]
133pub use crate::gdb::VcpuDebugStatusMessage;
134use crate::gpu::GpuControlCommand;
135use crate::gpu::GpuControlResult;
136
137/// Control the state of a particular VM CPU.
138#[derive(Clone, Debug)]
139pub enum VcpuControl {
140    #[cfg(feature = "gdb")]
141    Debug(VcpuDebug),
142    RunState(VmRunMode),
143    MakeRT,
144    // Request the current state of the vCPU. The result is sent back over the included channel.
145    GetStates(mpsc::Sender<VmRunMode>),
146    // Request the vcpu write a snapshot of itself to the writer, then send a `Result` back over
147    // the channel after completion/failure.
148    Snapshot(SnapshotWriter, mpsc::Sender<anyhow::Result<()>>),
149    Restore(VcpuRestoreRequest),
150    #[cfg(any(target_os = "android", target_os = "linux"))]
151    Throttle(u32),
152}
153
154/// Request to restore a Vcpu from a given snapshot, and report the results
155/// back via the provided channel.
156#[derive(Clone, Debug)]
157pub struct VcpuRestoreRequest {
158    pub result_sender: mpsc::Sender<anyhow::Result<()>>,
159    pub snapshot_reader: SnapshotReader,
160    #[cfg(target_arch = "x86_64")]
161    pub host_tsc_reference_moment: u64,
162}
163
164/// Mode of execution for the VM.
165#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
166pub enum VmRunMode {
167    /// The default run mode indicating the VCPUs are running.
168    #[default]
169    Running,
170    /// Indicates that the VCPUs are suspending execution until the `Running` mode is set.
171    Suspending,
172    /// Indicates that the VM is exiting all processes.
173    Exiting,
174    /// Indicates that the VM is in a breakpoint waiting for the debugger to do continue.
175    Breakpoint,
176}
177
178impl Display for VmRunMode {
179    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
180        use self::VmRunMode::*;
181
182        match self {
183            Running => write!(f, "running"),
184            Suspending => write!(f, "suspending"),
185            Exiting => write!(f, "exiting"),
186            Breakpoint => write!(f, "breakpoint"),
187        }
188    }
189}
190
191// Trait for devices that get notification on specific GPE trigger
192pub trait GpeNotify: Send {
193    fn notify(&mut self) {}
194}
195
196// Trait for devices that get notification on specific PCI PME
197pub trait PmeNotify: Send {
198    fn notify(&mut self, _requester_id: u16) {}
199}
200
201pub trait PmResource {
202    fn pwrbtn_evt(&mut self) {}
203    fn slpbtn_evt(&mut self) {}
204    fn rtc_evt(&mut self, _clear_evt: Event) {}
205    fn gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>) {}
206    fn pme_evt(&mut self, _requester_id: u16) {}
207    fn register_gpe_notify_dev(&mut self, _gpe: u32, _notify_dev: Arc<Mutex<dyn GpeNotify>>) {}
208    fn register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>) {}
209}
210
211/// The maximum number of devices that can be listed in one `UsbControlCommand`.
212///
213/// This value was set to be equal to `xhci_regs::MAX_PORTS` for convenience, but it is not
214/// necessary for correctness. Importing that value directly would be overkill because it would
215/// require adding a big dependency for a single const.
216pub const USB_CONTROL_MAX_PORTS: usize = 16;
217
218#[derive(Serialize, Deserialize, Debug)]
219pub enum DiskControlCommand {
220    /// Resize a disk to `new_size` in bytes.
221    Resize { new_size: u64 },
222}
223
224impl Display for DiskControlCommand {
225    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
226        use self::DiskControlCommand::*;
227
228        match self {
229            Resize { new_size } => write!(f, "disk_resize {new_size}"),
230        }
231    }
232}
233
234#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
235pub enum DiskControlResult {
236    Ok,
237    Err(SysError),
238}
239
240/// Net control commands for adding and removing tap devices.
241#[cfg(feature = "pci-hotplug")]
242#[derive(Serialize, Deserialize, Debug)]
243pub enum NetControlCommand {
244    AddTap(String),
245    RemoveTap(u8),
246}
247
248#[derive(Serialize, Deserialize, Debug)]
249pub enum UsbControlCommand {
250    AttachDevice {
251        #[serde(with = "with_as_descriptor")]
252        file: File,
253    },
254    AttachSecurityKey {
255        #[serde(with = "with_as_descriptor")]
256        file: File,
257    },
258    DetachDevice {
259        port: u8,
260    },
261    ListDevice {
262        ports: [u8; USB_CONTROL_MAX_PORTS],
263    },
264}
265
266#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default)]
267pub struct UsbControlAttachedDevice {
268    pub port: u8,
269    pub vendor_id: u16,
270    pub product_id: u16,
271}
272
273impl UsbControlAttachedDevice {
274    pub fn valid(self) -> bool {
275        self.port != 0
276    }
277}
278
279#[cfg(feature = "pci-hotplug")]
280#[derive(Serialize, Deserialize, Debug, Clone)]
281#[must_use]
282/// Result for hotplug and removal of PCI device.
283pub enum PciControlResult {
284    AddOk { bus: u8 },
285    ErrString(String),
286    RemoveOk,
287}
288
289#[cfg(feature = "pci-hotplug")]
290impl Display for PciControlResult {
291    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
292        use self::PciControlResult::*;
293
294        match self {
295            AddOk { bus } => write!(f, "add_ok {bus}"),
296            ErrString(e) => write!(f, "error: {e}"),
297            RemoveOk => write!(f, "remove_ok"),
298        }
299    }
300}
301
302#[derive(Serialize, Deserialize, Debug, Clone)]
303pub enum UsbControlResult {
304    Ok { port: u8 },
305    NoAvailablePort,
306    NoSuchDevice,
307    NoSuchPort,
308    FailedToOpenDevice,
309    Devices([UsbControlAttachedDevice; USB_CONTROL_MAX_PORTS]),
310    FailedToInitHostDevice,
311}
312
313impl Display for UsbControlResult {
314    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
315        use self::UsbControlResult::*;
316
317        match self {
318            UsbControlResult::Ok { port } => write!(f, "ok {port}"),
319            NoAvailablePort => write!(f, "no_available_port"),
320            NoSuchDevice => write!(f, "no_such_device"),
321            NoSuchPort => write!(f, "no_such_port"),
322            FailedToOpenDevice => write!(f, "failed_to_open_device"),
323            Devices(devices) => {
324                write!(f, "devices")?;
325                for d in devices.iter().filter(|d| d.valid()) {
326                    write!(f, " {} {:04x} {:04x}", d.port, d.vendor_id, d.product_id)?;
327                }
328                std::result::Result::Ok(())
329            }
330            FailedToInitHostDevice => write!(f, "failed_to_init_host_device"),
331        }
332    }
333}
334
335/// Commands for snapshot feature
336#[derive(Serialize, Deserialize, Debug)]
337pub enum SnapshotCommand {
338    Take {
339        snapshot_path: PathBuf,
340        compress_memory: bool,
341        encrypt: bool,
342    },
343}
344
345/// Commands for actions on devices and the devices control thread.
346#[derive(Serialize, Deserialize, Debug)]
347pub enum DeviceControlCommand {
348    SleepDevices,
349    WakeDevices,
350    SnapshotDevices { snapshot_writer: SnapshotWriter },
351    RestoreDevices { snapshot_reader: SnapshotReader },
352    GetDevicesState,
353    Exit,
354}
355
356/// Commands to control the IRQ handler thread.
357#[derive(Serialize, Deserialize)]
358pub enum IrqHandlerRequest {
359    /// No response is sent for this command.
360    AddIrqControlTubes(Vec<Tube>),
361    /// Refreshes the set of event tokens (Events) from the Irqchip that the IRQ
362    /// handler waits on to forward IRQs to their final destination (e.g. via
363    /// Irqchip::service_irq_event).
364    ///
365    /// If the set of tokens exposed by the Irqchip changes while the VM is
366    /// running (such as for snapshot restore), this command must be sent
367    /// otherwise the VM will not receive IRQs as expected.
368    RefreshIrqEventTokens,
369    WakeAndNotifyIteration,
370    /// No response is sent for this command.
371    Exit,
372}
373
374const EXPECTED_MAX_IRQ_FLUSH_ITERATIONS: usize = 100;
375
376/// Response for [IrqHandlerRequest].
377#[derive(Serialize, Deserialize, Debug)]
378pub enum IrqHandlerResponse {
379    /// Sent when the IRQ event tokens have been refreshed.
380    IrqEventTokenRefreshComplete,
381    /// Specifies the number of tokens serviced in the requested iteration
382    /// (less the token for the `WakeAndNotifyIteration` request).
383    HandlerIterationComplete(usize),
384}
385
386/// Source of a `VmMemoryRequest::RegisterMemory` mapping.
387#[derive(Serialize, Deserialize)]
388pub enum VmMemorySource {
389    /// Register shared memory represented by the given descriptor.
390    /// On Windows, descriptor MUST be a mapping handle.
391    SharedMemory(SharedMemory),
392    /// Register a file mapping from the given descriptor.
393    Descriptor {
394        /// File descriptor to map.
395        descriptor: SafeDescriptor,
396        /// Offset within the file in bytes.
397        offset: u64,
398        /// Size of the mapping in bytes.
399        size: u64,
400    },
401    /// Register memory mapped by Vulkano.
402    Vulkan {
403        descriptor: SafeDescriptor,
404        handle_type: u32,
405        memory_idx: u32,
406        device_uuid: [u8; 16],
407        driver_uuid: [u8; 16],
408        size: u64,
409    },
410    /// Register the current rutabaga external mapping.
411    ExternalMapping { ptr: u64, size: u64 },
412}
413
414// The following are wrappers to avoid base dependencies in the rutabaga crate
415fn to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor {
416    // SAFETY:
417    // Safe because we own the SafeDescriptor at this point.
418    unsafe { RutabagaDescriptor::from_raw_descriptor(s.into_raw_descriptor()) }
419}
420
421struct RutabagaMemoryRegion {
422    region: Box<dyn RutabagaMappedRegion>,
423}
424
425impl RutabagaMemoryRegion {
426    pub fn new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion {
427        RutabagaMemoryRegion { region }
428    }
429}
430
431// SAFETY:
432//
433// Self guarantees `ptr`..`ptr+size` is an mmaped region owned by this object that
434// can't be unmapped during the `MappedRegion`'s lifetime.
435unsafe impl MappedRegion for RutabagaMemoryRegion {
436    fn as_ptr(&self) -> *mut u8 {
437        self.region.as_ptr()
438    }
439
440    fn size(&self) -> usize {
441        self.region.size()
442    }
443}
444
445impl Display for VmMemorySource {
446    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
447        use self::VmMemorySource::*;
448
449        match self {
450            SharedMemory(..) => write!(f, "VmMemorySource::SharedMemory"),
451            Descriptor { .. } => write!(f, "VmMemorySource::Descriptor"),
452            Vulkan { .. } => write!(f, "VmMemorySource::Vulkan"),
453            ExternalMapping { .. } => write!(f, "VmMemorySource::ExternalMapping"),
454        }
455    }
456}
457
458impl VmMemorySource {
459    /// Map the resource and return its mapping and size in bytes.
460    fn map(
461        self,
462        gralloc: &mut RutabagaGralloc,
463        prot: Protection,
464    ) -> anyhow::Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)> {
465        let (mem_region, size, descriptor) = match self {
466            VmMemorySource::Descriptor {
467                descriptor,
468                offset,
469                size,
470            } => (
471                map_descriptor(&descriptor, offset, size, prot)?,
472                size,
473                Some(descriptor),
474            ),
475
476            VmMemorySource::SharedMemory(shm) => {
477                (map_descriptor(&shm, 0, shm.size(), prot)?, shm.size(), None)
478            }
479            VmMemorySource::Vulkan {
480                descriptor,
481                handle_type,
482                memory_idx,
483                device_uuid,
484                driver_uuid,
485                size,
486            } => {
487                let device_id = rutabaga_gfx::DeviceId {
488                    device_uuid,
489                    driver_uuid,
490                };
491                let mapped_region = gralloc
492                    .import_and_map(
493                        RutabagaMesaHandle {
494                            os_handle: to_rutabaga_desciptor(descriptor),
495                            handle_type,
496                        },
497                        VulkanInfo {
498                            memory_idx,
499                            device_id,
500                        },
501                        size,
502                    )
503                    .with_context(|| {
504                        format!(
505                            "gralloc failed to import and map, handle type: {handle_type}, memory index {memory_idx}, \
506                             size: {size}"
507                        )
508                    })?;
509                let mapped_region: Box<dyn MappedRegion> =
510                    Box::new(RutabagaMemoryRegion::new(mapped_region));
511                (mapped_region, size, None)
512            }
513            VmMemorySource::ExternalMapping { ptr, size } => {
514                let mapped_region: Box<dyn MappedRegion> = Box::new(ExternalMapping {
515                    ptr,
516                    size: size as usize,
517                });
518                (mapped_region, size, None)
519            }
520        };
521        Ok((mem_region, size, descriptor))
522    }
523}
524
525/// Destination of a `VmMemoryRequest::RegisterMemory` mapping in guest address space.
526#[derive(Serialize, Deserialize)]
527pub enum VmMemoryDestination {
528    /// Map at an offset within an existing PCI BAR allocation.
529    ExistingAllocation { allocation: Alloc, offset: u64 },
530    /// Map at the specified guest physical address.
531    GuestPhysicalAddress(u64),
532}
533
534impl VmMemoryDestination {
535    /// Allocate and return the guest address of a memory mapping destination.
536    pub fn allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress> {
537        let addr = match self {
538            VmMemoryDestination::ExistingAllocation { allocation, offset } => allocator
539                .mmio_allocator_any()
540                .address_from_pci_offset(allocation, offset, size)
541                .map_err(|_e| SysError::new(EINVAL))?,
542            VmMemoryDestination::GuestPhysicalAddress(gpa) => gpa,
543        };
544        Ok(GuestAddress(addr))
545    }
546}
547
548/// Request to register or unregister an ioevent.
549#[derive(Serialize, Deserialize)]
550pub struct IoEventUpdateRequest {
551    pub event: Event,
552    pub addr: u64,
553    pub datamatch: Datamatch,
554    pub register: bool,
555}
556
557/// Request to mmap a file to a shared memory.
558/// This request is supposed to follow a `VmMemoryRequest::MmapAndRegisterMemory` request that
559/// contains `SharedMemory` that `file` is mmaped to.
560#[cfg(any(target_os = "android", target_os = "linux"))]
561#[derive(Serialize, Deserialize)]
562pub struct VmMemoryFileMapping {
563    #[serde(with = "with_as_descriptor")]
564    pub file: File,
565    pub length: usize,
566    pub mem_offset: usize,
567    pub file_offset: u64,
568}
569
570#[derive(Serialize, Deserialize)]
571pub enum VmMemoryRequest {
572    /// Prepare a shared memory region to make later operations more efficient. This
573    /// may be a no-op depending on underlying platform support.
574    PrepareSharedMemoryRegion { alloc: Alloc, cache: MemCacheType },
575    /// Register a memory to be mapped to the guest.
576    RegisterMemory {
577        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
578        source: VmMemorySource,
579        /// Where to map the memory in the guest.
580        dest: VmMemoryDestination,
581        /// Whether to map the memory read only (true) or read-write (false).
582        prot: Protection,
583        /// Cache attribute for guest memory setting
584        cache: MemCacheType,
585    },
586    #[cfg(any(target_os = "android", target_os = "linux"))]
587    /// Call mmap to `shm` and register the memory region as a read-only guest memory.
588    /// This request is followed by an array of `VmMemoryFileMapping` with length
589    /// `num_file_mappings`
590    MmapAndRegisterMemory {
591        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
592        shm: SharedMemory,
593        /// Where to map the memory in the guest.
594        dest: VmMemoryDestination,
595        /// Length of the array of `VmMemoryFileMapping` that follows.
596        num_file_mappings: usize,
597    },
598    /// Call hypervisor to free the given memory range.
599    DynamicallyFreeMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
600    /// Call hypervisor to reclaim a priorly freed memory range.
601    DynamicallyReclaimMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
602    /// Balloon allocation/deallocation target reached.
603    BalloonTargetReached { size: u64 },
604    /// Unregister the given memory slot that was previously registered with `RegisterMemory`.
605    UnregisterMemory(VmMemoryRegionId),
606    /// Register an eventfd with raw guest memory address.
607    IoEventRaw(IoEventUpdateRequest),
608}
609
610/// Struct for managing `VmMemoryRequest`s IOMMU related state.
611pub struct VmMemoryRequestIommuClient {
612    tube: Arc<Mutex<Tube>>,
613    registered_memory: BTreeSet<VmMemoryRegionId>,
614}
615
616impl VmMemoryRequestIommuClient {
617    /// Constructs `VmMemoryRequestIommuClient` from a tube for communication with the viommu.
618    pub fn new(tube: Arc<Mutex<Tube>>) -> Self {
619        Self {
620            tube,
621            registered_memory: BTreeSet::new(),
622        }
623    }
624}
625
626enum RegisteredMemory {
627    FixedMapping {
628        slot: MemSlot,
629        offset: usize,
630        size: usize,
631    },
632    DynamicMapping {
633        slot: MemSlot,
634    },
635}
636
637pub struct VmMappedMemoryRegion {
638    guest_address: GuestAddress,
639    slot: MemSlot,
640}
641
642#[derive(Default)]
643pub struct VmMemoryRegionState {
644    mapped_regions: HashMap<Alloc, VmMappedMemoryRegion>,
645    registered_memory: BTreeMap<VmMemoryRegionId, RegisteredMemory>,
646}
647
648fn try_map_to_prepared_region(
649    vm: &dyn Vm,
650    region_state: &mut VmMemoryRegionState,
651    source: &VmMemorySource,
652    dest: &VmMemoryDestination,
653    prot: &Protection,
654) -> Option<VmMemoryResponse> {
655    let VmMemoryDestination::ExistingAllocation {
656        allocation,
657        offset: dest_offset,
658    } = dest
659    else {
660        return None;
661    };
662
663    let VmMappedMemoryRegion {
664        guest_address,
665        slot,
666    } = region_state.mapped_regions.get(allocation)?;
667
668    let (descriptor, file_offset, size) = match source {
669        VmMemorySource::Descriptor {
670            descriptor,
671            offset,
672            size,
673        } => (
674            Descriptor(descriptor.as_raw_descriptor()),
675            *offset,
676            *size as usize,
677        ),
678        VmMemorySource::SharedMemory(shm) => {
679            let size = shm.size() as usize;
680            (Descriptor(shm.as_raw_descriptor()), 0, size)
681        }
682        _ => {
683            let error = anyhow::anyhow!(
684                "source {} is not compatible with fixed mapping into prepared memory region",
685                source
686            );
687            return Some(VmMemoryResponse::Err(error.into()));
688        }
689    };
690    if let Err(err) = vm
691        .add_fd_mapping(
692            *slot,
693            *dest_offset as usize,
694            size,
695            &descriptor,
696            file_offset,
697            *prot,
698        )
699        .context("failed to add fd mapping when trying to map to prepared region")
700    {
701        return Some(VmMemoryResponse::Err(err.into()));
702    }
703
704    let guest_address = GuestAddress(guest_address.0 + dest_offset);
705    let region_id = VmMemoryRegionId(guest_address);
706    region_state.registered_memory.insert(
707        region_id,
708        RegisteredMemory::FixedMapping {
709            slot: *slot,
710            offset: *dest_offset as usize,
711            size,
712        },
713    );
714
715    Some(VmMemoryResponse::RegisterMemory {
716        region_id,
717        slot: *slot,
718    })
719}
720
721impl VmMemoryRequest {
722    /// Executes this request on the given Vm.
723    ///
724    /// # Arguments
725    /// * `vm` - The `Vm` to perform the request on.
726    /// * `allocator` - Used to allocate addresses.
727    ///
728    /// This does not return a result, instead encapsulating the success or failure in a
729    /// `VmMemoryResponse` with the intended purpose of sending the response back over the socket
730    /// that received this `VmMemoryResponse`.
731    pub fn execute(
732        self,
733        #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube,
734        vm: &dyn Vm,
735        sys_allocator: &mut SystemAllocator,
736        gralloc: &mut RutabagaGralloc,
737        iommu_client: Option<&mut VmMemoryRequestIommuClient>,
738        region_state: &mut VmMemoryRegionState,
739    ) -> VmMemoryResponse {
740        use self::VmMemoryRequest::*;
741        match self {
742            PrepareSharedMemoryRegion { alloc, cache } => {
743                // Currently the iommu_client is only used by virtio-gpu when used alongside GPU
744                // pci-passthrough.
745                //
746                // TODO(b/323368701): Make compatible with iommu_client by ensuring that
747                // VirtioIOMMUVfioCommand::VfioDmabufMap is submitted for both dynamic mappings and
748                // fixed mappings (i.e. whether or not try_map_to_prepared_region succeeds in
749                // RegisterMemory case below).
750                assert!(iommu_client.is_none());
751
752                if !sys::should_prepare_memory_region() {
753                    return VmMemoryResponse::Ok;
754                }
755
756                match sys::prepare_shared_memory_region(vm, sys_allocator, alloc, cache)
757                    .context("failed to prepare shared memory region")
758                {
759                    Ok(region) => {
760                        region_state.mapped_regions.insert(alloc, region);
761                        VmMemoryResponse::Ok
762                    }
763                    Err(e) => VmMemoryResponse::Err(e.into()),
764                }
765            }
766            RegisterMemory {
767                source,
768                dest,
769                prot,
770                cache,
771            } => {
772                if let Some(resp) =
773                    try_map_to_prepared_region(vm, region_state, &source, &dest, &prot)
774                {
775                    return resp;
776                }
777
778                // Correct on Windows because callers of this IPC guarantee descriptor is a mapping
779                // handle.
780                let (mapped_region, size, descriptor) =
781                    match source.map(gralloc, prot).context("gralloc mapping") {
782                        Ok((region, size, descriptor)) => (region, size, descriptor),
783                        Err(e) => return VmMemoryResponse::Err(e.into()),
784                    };
785
786                let guest_addr = match dest
787                    .allocate(sys_allocator, size)
788                    .context("VM memory destination allocation fails")
789                {
790                    Ok(addr) => addr,
791                    Err(e) => return VmMemoryResponse::Err(e.into()),
792                };
793
794                let slot = match vm
795                    .add_memory_region(
796                        guest_addr,
797                        mapped_region,
798                        prot == Protection::read(),
799                        false,
800                        cache,
801                    )
802                    .context("failed to add memory region when registering memory")
803                {
804                    Ok(slot) => slot,
805                    Err(e) => return VmMemoryResponse::Err(e.into()),
806                };
807
808                let region_id = VmMemoryRegionId(guest_addr);
809                if let (Some(descriptor), Some(iommu_client)) = (descriptor, iommu_client) {
810                    let request =
811                        VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDmabufMap {
812                            region_id,
813                            gpa: guest_addr.0,
814                            size,
815                            dma_buf: descriptor,
816                        });
817
818                    match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
819                        Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok)) => (),
820                        resp => {
821                            let error = anyhow::anyhow!(
822                                "Unexpected virtio-iommu message response when registering memory: \
823                                 {:?}", resp);
824                            if let Err(e) = vm.remove_memory_region(slot) {
825                                // There is nothing we can do here, so we just log a warning
826                                // message.
827                                warn!("failed to remove memory region: {:?}", e);
828                            }
829                            return VmMemoryResponse::Err(error.into());
830                        }
831                    };
832
833                    iommu_client.registered_memory.insert(region_id);
834                }
835
836                region_state
837                    .registered_memory
838                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
839                VmMemoryResponse::RegisterMemory { region_id, slot }
840            }
841            #[cfg(any(target_os = "android", target_os = "linux"))]
842            MmapAndRegisterMemory {
843                shm,
844                dest,
845                num_file_mappings,
846            } => {
847                // Define a callback to be executed with extended limit of file counts.
848                // It recieves `num_file_mappings` FDs and call `add_fd_mapping` for each.
849                let callback = || {
850                    let mem = match MemoryMappingBuilder::new(shm.size() as usize)
851                        .from_shared_memory(&shm)
852                        .build()
853                        .context("failed to build MemoryMapping from shared memory")
854                    {
855                        Ok(mem) => mem,
856                        Err(e) => return Err(VmMemoryResponse::Err(e.into())),
857                    };
858                    let mut mmap_arena = MemoryMappingArena::from(mem);
859
860                    // If `num_file_mappings` exceeds `SCM_MAX_FD`, `file_mappings` are sent in
861                    // chunks of length `SCM_MAX_FD`.
862                    let mut file_mappings = Vec::with_capacity(num_file_mappings);
863                    let mut read = 0;
864                    while read < num_file_mappings {
865                        let len = std::cmp::min(num_file_mappings - read, base::unix::SCM_MAX_FD);
866                        let mps: Vec<VmMemoryFileMapping> = match tube
867                            .recv_with_max_fds(len)
868                            .with_context(|| format!("get {num_file_mappings} FDs to be mapped"))
869                        {
870                            Ok(m) => m,
871                            Err(e) => return Err(VmMemoryResponse::Err(e.into())),
872                        };
873                        file_mappings.extend(mps.into_iter());
874                        read += len;
875                    }
876
877                    for VmMemoryFileMapping {
878                        mem_offset,
879                        length,
880                        file,
881                        file_offset,
882                    } in file_mappings
883                    {
884                        if let Err(e) = mmap_arena
885                            .add_fd_mapping(
886                                mem_offset,
887                                length,
888                                &file,
889                                file_offset,
890                                Protection::read(),
891                            )
892                            .context(
893                                "failed to add fd mapping when handling mmap and register memory",
894                            )
895                        {
896                            return Err(VmMemoryResponse::Err(e.into()));
897                        }
898                    }
899                    Ok(mmap_arena)
900                };
901                let mmap_arena = match call_with_extended_max_files(callback)
902                    .context("failed to set max count of file descriptors")
903                {
904                    Ok(Ok(m)) => m,
905                    Ok(Err(e)) => {
906                        return e;
907                    }
908                    Err(e) => {
909                        error!("{e:?}");
910                        return VmMemoryResponse::Err(e.into());
911                    }
912                };
913
914                let size = shm.size();
915                let guest_addr = match dest.allocate(sys_allocator, size).context(
916                    "VM memory destination allocation fails when handling mmap and register memory",
917                ) {
918                    Ok(addr) => addr,
919                    Err(e) => return VmMemoryResponse::Err(e.into()),
920                };
921
922                let slot = match vm
923                    .add_memory_region(
924                        guest_addr,
925                        Box::new(mmap_arena),
926                        true,
927                        false,
928                        MemCacheType::CacheCoherent,
929                    )
930                    .context("failed to add memory region when handling mmap and register memory")
931                {
932                    Ok(slot) => slot,
933                    Err(e) => return VmMemoryResponse::Err(e.into()),
934                };
935
936                let region_id = VmMemoryRegionId(guest_addr);
937
938                region_state
939                    .registered_memory
940                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
941
942                VmMemoryResponse::RegisterMemory { region_id, slot }
943            }
944            UnregisterMemory(id) => match region_state.registered_memory.remove(&id) {
945                Some(RegisteredMemory::DynamicMapping { slot }) => match vm
946                    .remove_memory_region(slot)
947                    .context(
948                        "failed to remove memory region when unregistering dynamic mapping memory",
949                    ) {
950                    Ok(_) => {
951                        if let Some(iommu_client) = iommu_client {
952                            if iommu_client.registered_memory.remove(&id) {
953                                let request = VirtioIOMMURequest::VfioCommand(
954                                    VirtioIOMMUVfioCommand::VfioDmabufUnmap(id),
955                                );
956
957                                match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
958                                    Ok(VirtioIOMMUResponse::VfioResponse(
959                                        VirtioIOMMUVfioResult::Ok,
960                                    )) => VmMemoryResponse::Ok,
961                                    resp => {
962                                        let error = anyhow::anyhow!(
963                                            "Unexpected virtio-iommu message response when \
964                                             unregistering memory: {:?}",
965                                            resp
966                                        );
967                                        VmMemoryResponse::Err(error.into())
968                                    }
969                                }
970                            } else {
971                                VmMemoryResponse::Ok
972                            }
973                        } else {
974                            VmMemoryResponse::Ok
975                        }
976                    }
977                    Err(e) => VmMemoryResponse::Err(e.into()),
978                },
979                Some(RegisteredMemory::FixedMapping { slot, offset, size }) => {
980                    match vm.remove_mapping(slot, offset, size).context(
981                        "failed to remove memory mapping when unregistering fixed mapping memory",
982                    ) {
983                        Ok(()) => VmMemoryResponse::Ok,
984                        Err(e) => VmMemoryResponse::Err(e.into()),
985                    }
986                }
987                None => {
988                    let error =
989                        anyhow::anyhow!("can't find the memory region when unregistering memory");
990                    VmMemoryResponse::Err(error.into())
991                }
992            },
993            DynamicallyFreeMemoryRanges { ranges } => {
994                let mut r = VmMemoryResponse::Ok;
995                for (guest_address, size) in ranges {
996                    match vm
997                        .handle_balloon_event(BalloonEvent::Inflate(MemRegion {
998                            guest_address,
999                            size,
1000                        }))
1001                        .context(
1002                            "failed to handle the inflate balloon event when freeing memory ranges \
1003                             dynamically",
1004                        ) {
1005                        Ok(_) => {}
1006                        Err(e) => {
1007                            error!("{:?}", e);
1008                            r = VmMemoryResponse::Err(e.into());
1009                            break;
1010                        }
1011                    }
1012                }
1013                r
1014            }
1015            DynamicallyReclaimMemoryRanges { ranges } => {
1016                let mut r = VmMemoryResponse::Ok;
1017                for (guest_address, size) in ranges {
1018                    match vm
1019                        .handle_balloon_event(BalloonEvent::Deflate(MemRegion {
1020                            guest_address,
1021                            size,
1022                        }))
1023                        .context(
1024                            "failed to handle the deflate balloon event when reclaiming memory \
1025                             ranges dynamically",
1026                        ) {
1027                        Ok(_) => {}
1028                        Err(e) => {
1029                            error!("{:?}", e);
1030                            r = VmMemoryResponse::Err(e.into());
1031                            break;
1032                        }
1033                    }
1034                }
1035                r
1036            }
1037            BalloonTargetReached { size } => {
1038                match vm
1039                    .handle_balloon_event(BalloonEvent::BalloonTargetReached(size))
1040                    .context("failed to handle the target reached balloon event")
1041                {
1042                    Ok(_) => VmMemoryResponse::Ok,
1043                    Err(e) => VmMemoryResponse::Err(e.into()),
1044                }
1045            }
1046            IoEventRaw(request) => {
1047                let res = if request.register {
1048                    vm.register_ioevent(
1049                        request.event,
1050                        IoEventAddress::Mmio(request.addr),
1051                        request.datamatch,
1052                    )
1053                    .context("failed to register IO event")
1054                } else {
1055                    vm.unregister_ioevent(
1056                        request.event,
1057                        IoEventAddress::Mmio(request.addr),
1058                        request.datamatch,
1059                    )
1060                    .context("failed to unregister IO event")
1061                };
1062                match res {
1063                    Ok(_) => VmMemoryResponse::Ok,
1064                    Err(e) => VmMemoryResponse::Err(e.into()),
1065                }
1066            }
1067        }
1068    }
1069}
1070
1071#[derive(Serialize, Deserialize, Debug, PartialOrd, PartialEq, Eq, Ord, Clone, Copy)]
1072/// Identifer for registered memory regions. Globally unique.
1073// The current implementation uses guest physical address as the unique identifier.
1074pub struct VmMemoryRegionId(pub GuestAddress);
1075
1076#[derive(Serialize, Deserialize, Debug)]
1077pub enum VmMemoryResponse {
1078    /// The request to register memory into guest address space was successful.
1079    RegisterMemory {
1080        region_id: VmMemoryRegionId,
1081        slot: u32,
1082    },
1083    Ok,
1084    Err(VmMemoryResponseError),
1085}
1086
1087impl<T> From<Result<T>> for VmMemoryResponse {
1088    fn from(r: Result<T>) -> Self {
1089        match r {
1090            Ok(_) => VmMemoryResponse::Ok,
1091            Err(e) => VmMemoryResponse::Err(anyhow::Error::new(e).into()),
1092        }
1093    }
1094}
1095
1096#[derive(Debug, thiserror::Error)]
1097#[error("Vm memory response error: {0}")]
1098pub struct VmMemoryResponseError(#[from] pub anyhow::Error);
1099
1100impl TryFrom<FlatVmMemoryResponseError> for VmMemoryResponseError {
1101    type Error = anyhow::Error;
1102    fn try_from(value: FlatVmMemoryResponseError) -> StdResult<Self, Self::Error> {
1103        let inner = value
1104            .0
1105            .into_iter()
1106            .fold(
1107                None,
1108                |error: Option<anyhow::Error>, current_context| match error {
1109                    Some(error) => Some(error.context(current_context)),
1110                    None => Some(anyhow::Error::msg(current_context)),
1111                },
1112            )
1113            .context("should carry at least one error")?;
1114        Ok(Self(inner))
1115    }
1116}
1117
1118impl Serialize for VmMemoryResponseError {
1119    fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error>
1120    where
1121        S: serde::Serializer,
1122    {
1123        let flat: FlatVmMemoryResponseError = self.into();
1124        flat.serialize(serializer)
1125    }
1126}
1127
1128impl<'de> Deserialize<'de> for VmMemoryResponseError {
1129    fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error>
1130    where
1131        D: serde::Deserializer<'de>,
1132    {
1133        let flat = FlatVmMemoryResponseError::deserialize(deserializer)?;
1134        flat.try_into()
1135            .map_err(|e: anyhow::Error| D::Error::custom(e.to_string()))
1136    }
1137}
1138
1139#[derive(Debug, Serialize, Deserialize)]
1140struct FlatVmMemoryResponseError(Vec<String>);
1141
1142impl From<&VmMemoryResponseError> for FlatVmMemoryResponseError {
1143    fn from(value: &VmMemoryResponseError) -> Self {
1144        let contexts = value
1145            .0
1146            .chain()
1147            .map(ToString::to_string)
1148            .rev()
1149            .collect::<Vec<_>>();
1150        Self(contexts)
1151    }
1152}
1153
1154#[derive(Serialize, Deserialize, Debug)]
1155pub enum VmIrqRequest {
1156    /// Allocate one gsi, and associate gsi to irqfd with register_irqfd()
1157    AllocateOneMsi {
1158        irqfd: Event,
1159        device_id: DeviceId,
1160        queue_id: usize,
1161        device_name: String,
1162    },
1163    /// Allocate a specific gsi to irqfd with register_irqfd(). This must only
1164    /// be used when it is known that the gsi is free. Only the snapshot
1165    /// subsystem can make this guarantee, and use of this request by any other
1166    /// caller is strongly discouraged.
1167    AllocateOneMsiAtGsi {
1168        irqfd: Event,
1169        gsi: u32,
1170        device_id: DeviceId,
1171        queue_id: usize,
1172        device_name: String,
1173    },
1174    /// Add one msi route entry into the IRQ chip.
1175    AddMsiRoute {
1176        gsi: u32,
1177        msi_address: u64,
1178        msi_data: u32,
1179        #[cfg(target_arch = "aarch64")]
1180        pci_address: resources::PciAddress,
1181    },
1182    // unregister_irqfs() and release gsi
1183    ReleaseOneIrq {
1184        gsi: u32,
1185        irqfd: Event,
1186    },
1187}
1188
1189/// Data to set up an IRQ event or IRQ route on the IRQ chip.
1190/// VmIrqRequest::execute can't take an `IrqChip` argument, because of a dependency cycle between
1191/// devices and vm_control, so it takes a Fn that processes an `IrqSetup`.
1192pub enum IrqSetup<'a> {
1193    Event(u32, &'a Event, DeviceId, usize, String),
1194    Route(IrqRoute),
1195    UnRegister(u32, &'a Event),
1196}
1197
1198impl VmIrqRequest {
1199    /// Executes this request on the given Vm.
1200    ///
1201    /// # Arguments
1202    /// * `set_up_irq` - A function that applies an `IrqSetup` to an IRQ chip.
1203    ///
1204    /// This does not return a result, instead encapsulating the success or failure in a
1205    /// `VmIrqResponse` with the intended purpose of sending the response back over the socket
1206    /// that received this `VmIrqResponse`.
1207    pub fn execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse
1208    where
1209        F: FnOnce(IrqSetup) -> Result<()>,
1210    {
1211        use self::VmIrqRequest::*;
1212        match *self {
1213            AllocateOneMsi {
1214                ref irqfd,
1215                device_id,
1216                queue_id,
1217                ref device_name,
1218            } => {
1219                if let Some(irq_num) = sys_allocator.allocate_irq() {
1220                    match set_up_irq(IrqSetup::Event(
1221                        irq_num,
1222                        irqfd,
1223                        device_id,
1224                        queue_id,
1225                        device_name.clone(),
1226                    )) {
1227                        Ok(_) => VmIrqResponse::AllocateOneMsi { gsi: irq_num },
1228                        Err(e) => VmIrqResponse::Err(e),
1229                    }
1230                } else {
1231                    VmIrqResponse::Err(SysError::new(EINVAL))
1232                }
1233            }
1234            AllocateOneMsiAtGsi {
1235                ref irqfd,
1236                gsi,
1237                device_id,
1238                queue_id,
1239                ref device_name,
1240            } => {
1241                match set_up_irq(IrqSetup::Event(
1242                    gsi,
1243                    irqfd,
1244                    device_id,
1245                    queue_id,
1246                    device_name.clone(),
1247                )) {
1248                    Ok(_) => VmIrqResponse::Ok,
1249                    Err(e) => VmIrqResponse::Err(e),
1250                }
1251            }
1252            AddMsiRoute {
1253                gsi,
1254                msi_address,
1255                msi_data,
1256                #[cfg(target_arch = "aarch64")]
1257                pci_address,
1258            } => {
1259                let route = IrqRoute {
1260                    gsi,
1261                    source: IrqSource::Msi {
1262                        address: msi_address,
1263                        data: msi_data,
1264                        #[cfg(target_arch = "aarch64")]
1265                        pci_address,
1266                    },
1267                };
1268                match set_up_irq(IrqSetup::Route(route)) {
1269                    Ok(_) => VmIrqResponse::Ok,
1270                    Err(e) => VmIrqResponse::Err(e),
1271                }
1272            }
1273            ReleaseOneIrq { gsi, ref irqfd } => {
1274                let _ = set_up_irq(IrqSetup::UnRegister(gsi, irqfd));
1275                sys_allocator.release_irq(gsi);
1276                VmIrqResponse::Ok
1277            }
1278        }
1279    }
1280}
1281
1282#[derive(Serialize, Deserialize, Debug)]
1283pub enum VmIrqResponse {
1284    AllocateOneMsi { gsi: u32 },
1285    Ok,
1286    Err(SysError),
1287}
1288
1289#[derive(Serialize, Deserialize, Debug, Clone)]
1290pub enum DevicesState {
1291    Sleep,
1292    Wake,
1293}
1294
1295#[derive(Serialize, Deserialize, Debug, Clone)]
1296pub enum BatControlResult {
1297    Ok,
1298    NoBatDevice,
1299    NoSuchHealth,
1300    NoSuchProperty,
1301    NoSuchStatus,
1302    NoSuchBatType,
1303    StringParseIntErr,
1304    StringParseBoolErr,
1305}
1306
1307impl Display for BatControlResult {
1308    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1309        use self::BatControlResult::*;
1310
1311        match self {
1312            Ok => write!(f, "Setting battery property successfully"),
1313            NoBatDevice => write!(f, "No battery device created"),
1314            NoSuchHealth => write!(f, "Invalid Battery health setting. Only support: unknown/good/overheat/dead/overvoltage/unexpectedfailure/cold/watchdogtimerexpire/safetytimerexpire/overcurrent"),
1315            NoSuchProperty => write!(f, "Battery doesn't have such property. Only support: status/health/present/capacity/aconline"),
1316            NoSuchStatus => write!(f, "Invalid Battery status setting. Only support: unknown/charging/discharging/notcharging/full"),
1317            NoSuchBatType => write!(f, "Invalid Battery type setting. Only support: goldfish"),
1318            StringParseIntErr => write!(f, "Battery property target ParseInt error"),
1319            StringParseBoolErr => write!(f, "Battery property target ParseBool error"),
1320        }
1321    }
1322}
1323
1324#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, PartialEq, Eq)]
1325#[serde(rename_all = "kebab-case")]
1326pub enum BatteryType {
1327    #[default]
1328    Goldfish,
1329}
1330
1331impl FromStr for BatteryType {
1332    type Err = BatControlResult;
1333
1334    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1335        match s {
1336            "goldfish" => Ok(BatteryType::Goldfish),
1337            _ => Err(BatControlResult::NoSuchBatType),
1338        }
1339    }
1340}
1341
1342#[derive(Serialize, Deserialize, Debug)]
1343pub enum BatProperty {
1344    Status,
1345    Health,
1346    Present,
1347    Capacity,
1348    ACOnline,
1349    SetFakeBatConfig,
1350    CancelFakeBatConfig,
1351}
1352
1353impl FromStr for BatProperty {
1354    type Err = BatControlResult;
1355
1356    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1357        match s {
1358            "status" => Ok(BatProperty::Status),
1359            "health" => Ok(BatProperty::Health),
1360            "present" => Ok(BatProperty::Present),
1361            "capacity" => Ok(BatProperty::Capacity),
1362            "aconline" => Ok(BatProperty::ACOnline),
1363            "set_fake_bat_config" => Ok(BatProperty::SetFakeBatConfig),
1364            "cancel_fake_bat_config" => Ok(BatProperty::CancelFakeBatConfig),
1365            _ => Err(BatControlResult::NoSuchProperty),
1366        }
1367    }
1368}
1369
1370impl Display for BatProperty {
1371    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1372        match *self {
1373            BatProperty::Status => write!(f, "status"),
1374            BatProperty::Health => write!(f, "health"),
1375            BatProperty::Present => write!(f, "present"),
1376            BatProperty::Capacity => write!(f, "capacity"),
1377            BatProperty::ACOnline => write!(f, "aconline"),
1378            BatProperty::SetFakeBatConfig => write!(f, "set_fake_bat_config"),
1379            BatProperty::CancelFakeBatConfig => write!(f, "cancel_fake_bat_config"),
1380        }
1381    }
1382}
1383
1384#[derive(Serialize, Deserialize, Debug)]
1385pub enum BatStatus {
1386    Unknown,
1387    Charging,
1388    DisCharging,
1389    NotCharging,
1390    Full,
1391}
1392
1393impl BatStatus {
1394    pub fn new(status: String) -> std::result::Result<Self, BatControlResult> {
1395        match status.as_str() {
1396            "unknown" => Ok(BatStatus::Unknown),
1397            "charging" => Ok(BatStatus::Charging),
1398            "discharging" => Ok(BatStatus::DisCharging),
1399            "notcharging" => Ok(BatStatus::NotCharging),
1400            "full" => Ok(BatStatus::Full),
1401            _ => Err(BatControlResult::NoSuchStatus),
1402        }
1403    }
1404}
1405
1406impl FromStr for BatStatus {
1407    type Err = BatControlResult;
1408
1409    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1410        match s {
1411            "unknown" => Ok(BatStatus::Unknown),
1412            "charging" => Ok(BatStatus::Charging),
1413            "discharging" => Ok(BatStatus::DisCharging),
1414            "notcharging" => Ok(BatStatus::NotCharging),
1415            "full" => Ok(BatStatus::Full),
1416            _ => Err(BatControlResult::NoSuchStatus),
1417        }
1418    }
1419}
1420
1421impl From<BatStatus> for u32 {
1422    fn from(status: BatStatus) -> Self {
1423        status as u32
1424    }
1425}
1426
1427#[derive(Serialize, Deserialize, Debug)]
1428pub enum BatHealth {
1429    Unknown,
1430    Good,
1431    Overheat,
1432    Dead,
1433    OverVoltage,
1434    UnexpectedFailure,
1435    Cold,
1436    WatchdogTimerExpire,
1437    SafetyTimerExpire,
1438    OverCurrent,
1439}
1440
1441impl FromStr for BatHealth {
1442    type Err = BatControlResult;
1443
1444    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1445        match s {
1446            "unknown" => Ok(BatHealth::Unknown),
1447            "good" => Ok(BatHealth::Good),
1448            "overheat" => Ok(BatHealth::Overheat),
1449            "dead" => Ok(BatHealth::Dead),
1450            "overvoltage" => Ok(BatHealth::OverVoltage),
1451            "unexpectedfailure" => Ok(BatHealth::UnexpectedFailure),
1452            "cold" => Ok(BatHealth::Cold),
1453            "watchdogtimerexpire" => Ok(BatHealth::WatchdogTimerExpire),
1454            "safetytimerexpire" => Ok(BatHealth::SafetyTimerExpire),
1455            "overcurrent" => Ok(BatHealth::OverCurrent),
1456            _ => Err(BatControlResult::NoSuchHealth),
1457        }
1458    }
1459}
1460
1461impl From<BatHealth> for u32 {
1462    fn from(status: BatHealth) -> Self {
1463        status as u32
1464    }
1465}
1466
1467#[derive(Serialize, Deserialize, Debug)]
1468pub enum BatControlCommand {
1469    SetStatus(BatStatus),
1470    SetHealth(BatHealth),
1471    SetPresent(u32),
1472    SetCapacity(u32),
1473    SetACOnline(u32),
1474    SetFakeBatConfig(u32),
1475    CancelFakeConfig,
1476}
1477
1478impl BatControlCommand {
1479    pub fn new(property: String, target: String) -> std::result::Result<Self, BatControlResult> {
1480        let cmd = property.parse::<BatProperty>()?;
1481        match cmd {
1482            BatProperty::Status => Ok(BatControlCommand::SetStatus(target.parse::<BatStatus>()?)),
1483            BatProperty::Health => Ok(BatControlCommand::SetHealth(target.parse::<BatHealth>()?)),
1484            BatProperty::Present => Ok(BatControlCommand::SetPresent(
1485                target
1486                    .parse::<u32>()
1487                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1488            )),
1489            BatProperty::Capacity => Ok(BatControlCommand::SetCapacity(
1490                target
1491                    .parse::<u32>()
1492                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1493            )),
1494            BatProperty::ACOnline => Ok(BatControlCommand::SetACOnline(
1495                target
1496                    .parse::<u32>()
1497                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1498            )),
1499            BatProperty::SetFakeBatConfig => Ok(BatControlCommand::SetFakeBatConfig(
1500                target
1501                    .parse::<u32>()
1502                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1503            )),
1504            BatProperty::CancelFakeBatConfig => Ok(BatControlCommand::CancelFakeConfig),
1505        }
1506    }
1507}
1508
1509/// Used for VM to control battery properties.
1510pub struct BatControl {
1511    pub type_: BatteryType,
1512    pub control_tube: Tube,
1513}
1514
1515/// Used for VM to control for virtio-snd
1516#[derive(Serialize, Deserialize, Debug)]
1517pub enum SndControlCommand {
1518    MuteAll(bool),
1519}
1520
1521// Used to mark hotplug pci device's device type
1522#[derive(Serialize, Deserialize, Debug, Clone)]
1523pub enum HotPlugDeviceType {
1524    UpstreamPort,
1525    DownstreamPort,
1526    EndPoint,
1527}
1528
1529// Used for VM to hotplug pci devices
1530#[derive(Serialize, Deserialize, Debug, Clone)]
1531pub struct HotPlugDeviceInfo {
1532    pub device_type: HotPlugDeviceType,
1533    pub path: PathBuf,
1534    pub hp_interrupt: bool,
1535}
1536
1537/// Message for communicating a suspend or resume to the virtio-pvclock device.
1538#[derive(Serialize, Deserialize, Debug, Clone)]
1539pub enum PvClockCommand {
1540    Suspend,
1541    Resume,
1542}
1543
1544/// Message used by virtio-pvclock to communicate command results.
1545#[derive(Serialize, Deserialize, Debug)]
1546pub enum PvClockCommandResponse {
1547    Ok,
1548    Resumed { total_suspended_ticks: u64 },
1549    DeviceInactive,
1550    Err(SysError),
1551}
1552
1553/// Commands for vmm-swap feature
1554#[derive(Serialize, Deserialize, Debug)]
1555pub enum SwapCommand {
1556    Enable,
1557    Trim,
1558    SwapOut,
1559    Disable { slow_file_cleanup: bool },
1560    Status,
1561}
1562
1563///
1564/// A request to the main process to perform some operation on the VM.
1565///
1566/// Unless otherwise noted, each request should expect a `VmResponse::Ok` to be received on success.
1567#[derive(Serialize, Deserialize, Debug)]
1568pub enum VmRequest {
1569    /// Break the VM's run loop and exit.
1570    Exit,
1571    /// Trigger a power button event in the guest.
1572    Powerbtn,
1573    /// Trigger a sleep button event in the guest.
1574    Sleepbtn,
1575    /// Trigger a RTC interrupt in the guest. When the irq associated with the RTC is
1576    /// resampled, it will be re-asserted as long as `clear_evt` is not signaled.
1577    Rtc { clear_evt: Event },
1578    /// Suspend the VM's VCPUs until resume.
1579    SuspendVcpus,
1580    /// Swap the memory content into files on a disk
1581    Swap(SwapCommand),
1582    /// Resume the VM's VCPUs that were previously suspended.
1583    ResumeVcpus,
1584    /// Inject a general-purpose event. If `clear_evt` is provided, when the irq associated
1585    /// with the GPE is resampled, it will be re-asserted as long as `clear_evt` is not
1586    /// signaled.
1587    Gpe { gpe: u32, clear_evt: Option<Event> },
1588    /// Inject a PCI PME
1589    PciPme(u16),
1590    /// Make the VM's RT VCPU real-time.
1591    MakeRT,
1592    /// Command for balloon driver.
1593    #[cfg(feature = "balloon")]
1594    BalloonCommand(BalloonControlCommand),
1595    /// Send a command to a disk chosen by `disk_index`.
1596    /// `disk_index` is a 0-based count of `--disk`, `--rwdisk`, and `-r` command-line options.
1597    DiskCommand {
1598        disk_index: usize,
1599        command: DiskControlCommand,
1600    },
1601    /// Command to use controller.
1602    UsbCommand(UsbControlCommand),
1603    /// Command to modify the gpu.
1604    GpuCommand(GpuControlCommand),
1605    /// Command to set battery.
1606    BatCommand(BatteryType, BatControlCommand),
1607    /// Command to control snd devices
1608    #[cfg(feature = "audio")]
1609    SndCommand(SndControlCommand),
1610    /// Command to add/remove multiple vfio-pci devices
1611    HotPlugVfioCommand {
1612        device: HotPlugDeviceInfo,
1613        add: bool,
1614    },
1615    /// Command to add/remove network tap device as virtio-pci device
1616    #[cfg(feature = "pci-hotplug")]
1617    HotPlugNetCommand(NetControlCommand),
1618    /// Command to Snapshot devices
1619    Snapshot(SnapshotCommand),
1620    /// Register for event notification
1621    RegisterListener {
1622        socket_addr: String,
1623        event: RegisteredEvent,
1624    },
1625    /// Unregister for notifications for event
1626    UnregisterListener {
1627        socket_addr: String,
1628        event: RegisteredEvent,
1629    },
1630    /// Unregister for all event notification
1631    Unregister { socket_addr: String },
1632    /// Suspend VM VCPUs and Devices until resume.
1633    SuspendVm,
1634    /// Resume VM VCPUs and Devices.
1635    ResumeVm,
1636    /// Returns Vcpus PID/TID
1637    VcpuPidTid,
1638    /// Throttles the requested vCPU for microseconds
1639    Throttle(usize, u32),
1640    /// Returns unique descriptor of this VM.
1641    GetVmDescriptor,
1642    /// Registers memory in guest.
1643    RegisterMemory {
1644        fd: SafeDescriptor,
1645        offset: u64,
1646        range_start: u64,
1647        range_end: u64,
1648        cache_coherent: bool,
1649    },
1650    /// Unregisters memory in guest.
1651    UnregisterMemory { region_id: u64 },
1652}
1653
1654/// NOTE: when making any changes to this enum please also update
1655/// RegisteredEventFfi in crosvm_control/src/lib.rs
1656#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
1657pub enum RegisteredEvent {
1658    VirtioBalloonWsReport,
1659    VirtioBalloonResize,
1660    VirtioBalloonOOMDeflation,
1661}
1662
1663#[derive(Serialize, Deserialize, Debug)]
1664pub enum RegisteredEventWithData {
1665    VirtioBalloonWsReport {
1666        ws_buckets: Vec<balloon_control::WSBucket>,
1667        balloon_actual: u64,
1668    },
1669    VirtioBalloonResize,
1670    VirtioBalloonOOMDeflation,
1671}
1672
1673impl RegisteredEventWithData {
1674    pub fn into_event(&self) -> RegisteredEvent {
1675        match self {
1676            Self::VirtioBalloonWsReport { .. } => RegisteredEvent::VirtioBalloonWsReport,
1677            Self::VirtioBalloonResize => RegisteredEvent::VirtioBalloonResize,
1678            Self::VirtioBalloonOOMDeflation => RegisteredEvent::VirtioBalloonOOMDeflation,
1679        }
1680    }
1681
1682    #[cfg(feature = "registered_events")]
1683    pub fn into_proto(&self) -> registered_events::RegisteredEvent {
1684        match self {
1685            Self::VirtioBalloonWsReport {
1686                ws_buckets,
1687                balloon_actual,
1688            } => {
1689                let mut report = registered_events::VirtioBalloonWsReport {
1690                    balloon_actual: *balloon_actual,
1691                    ..registered_events::VirtioBalloonWsReport::new()
1692                };
1693                for ws in ws_buckets {
1694                    report.ws_buckets.push(registered_events::VirtioWsBucket {
1695                        age: ws.age,
1696                        file_bytes: ws.bytes[0],
1697                        anon_bytes: ws.bytes[1],
1698                        ..registered_events::VirtioWsBucket::new()
1699                    });
1700                }
1701                let mut event = registered_events::RegisteredEvent::new();
1702                event.set_ws_report(report);
1703                event
1704            }
1705            Self::VirtioBalloonResize => {
1706                let mut event = registered_events::RegisteredEvent::new();
1707                event.set_resize(registered_events::VirtioBalloonResize::new());
1708                event
1709            }
1710            Self::VirtioBalloonOOMDeflation => {
1711                let mut event = registered_events::RegisteredEvent::new();
1712                event.set_oom_deflation(registered_events::VirtioBalloonOOMDeflation::new());
1713                event
1714            }
1715        }
1716    }
1717
1718    pub fn from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self {
1719        RegisteredEventWithData::VirtioBalloonWsReport {
1720            ws_buckets: ws.ws.clone(),
1721            balloon_actual,
1722        }
1723    }
1724}
1725
1726pub fn handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse {
1727    // Forward the request to the block device process via its control socket.
1728    if let Err(e) = disk_host_tube.send(command) {
1729        error!("disk socket send failed: {}", e);
1730        return VmResponse::Err(SysError::new(EINVAL));
1731    }
1732
1733    // Wait for the disk control command to be processed
1734    match disk_host_tube.recv() {
1735        Ok(DiskControlResult::Ok) => VmResponse::Ok,
1736        Ok(DiskControlResult::Err(e)) => VmResponse::Err(e),
1737        Err(e) => {
1738            error!("disk socket recv failed: {}", e);
1739            VmResponse::Err(SysError::new(EINVAL))
1740        }
1741    }
1742}
1743
1744/// WARNING: descriptor must be a mapping handle on Windows.
1745fn map_descriptor(
1746    descriptor: &dyn AsRawDescriptor,
1747    offset: u64,
1748    size: u64,
1749    prot: Protection,
1750) -> Result<Box<dyn MappedRegion>> {
1751    let size: usize = size.try_into().map_err(|_e| SysError::new(ERANGE))?;
1752    match MemoryMappingBuilder::new(size)
1753        .from_descriptor(descriptor)
1754        .offset(offset)
1755        .protection(prot)
1756        .build()
1757    {
1758        Ok(mmap) => Ok(Box::new(mmap)),
1759        Err(MmapError::SystemCallFailed(e)) => Err(e),
1760        _ => Err(SysError::new(EINVAL)),
1761    }
1762}
1763
1764// Get vCPU state. vCPUs are expected to all hold the same state.
1765// In this function, there may be a time where vCPUs are not holding the same state
1766// as they transition from one state to the other. This is expected, and the final result
1767// should be all vCPUs holding the same state.
1768fn get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode> {
1769    let (send_chan, recv_chan) = mpsc::channel();
1770    kick_vcpus(VcpuControl::GetStates(send_chan));
1771    if vcpu_num == 0 {
1772        bail!("vcpu_num is zero");
1773    }
1774    let mut current_mode_vec: Vec<VmRunMode> = Vec::new();
1775    for _ in 0..vcpu_num {
1776        match recv_chan.recv() {
1777            Ok(state) => current_mode_vec.push(state),
1778            Err(e) => {
1779                bail!("Failed to get vCPU state: {}", e);
1780            }
1781        };
1782    }
1783    let first_state = current_mode_vec[0];
1784    if first_state == VmRunMode::Exiting {
1785        panic!("Attempt to snapshot while exiting.");
1786    }
1787    if current_mode_vec.iter().any(|x| *x != first_state) {
1788        // We do not panic here. It could be that vCPUs are transitioning from one mode to another.
1789        bail!("Unknown VM state: vCPUs hold different states.");
1790    }
1791    Ok(first_state)
1792}
1793
1794/// A guard to guarantee that all the vCPUs are suspended during the scope.
1795///
1796/// When this guard is dropped, it rolls back the state of CPUs.
1797pub struct VcpuSuspendGuard<'a> {
1798    saved_run_mode: VmRunMode,
1799    kick_vcpus: &'a dyn Fn(VcpuControl),
1800}
1801
1802impl<'a> VcpuSuspendGuard<'a> {
1803    /// Check the all vCPU state and suspend the vCPUs if they are running.
1804    ///
1805    /// This returns [VcpuSuspendGuard] to rollback the vcpu state.
1806    ///
1807    /// # Arguments
1808    ///
1809    /// * `kick_vcpus` - A funtion to send [VcpuControl] message to all the vCPUs and interrupt
1810    ///   them.
1811    /// * `vcpu_num` - The number of vCPUs.
1812    pub fn new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self> {
1813        // get initial vcpu state
1814        let saved_run_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1815        match saved_run_mode {
1816            VmRunMode::Running => {
1817                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1818                // Blocking call, waiting for response to ensure vCPU state was updated.
1819                // In case of failure, where a vCPU still has the state running, start up vcpus and
1820                // abort operation.
1821                let current_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1822                if current_mode != VmRunMode::Suspending {
1823                    kick_vcpus(VcpuControl::RunState(saved_run_mode));
1824                    bail!("vCPUs failed to all suspend. Kicking back all vCPUs to their previous state: {saved_run_mode}");
1825                }
1826            }
1827            VmRunMode::Suspending => {
1828                // do nothing. keep the state suspending.
1829            }
1830            other => {
1831                bail!("vcpus are not in running/suspending state, but {}", other);
1832            }
1833        };
1834        Ok(Self {
1835            saved_run_mode,
1836            kick_vcpus,
1837        })
1838    }
1839}
1840
1841impl Drop for VcpuSuspendGuard<'_> {
1842    fn drop(&mut self) {
1843        if self.saved_run_mode != VmRunMode::Suspending {
1844            (self.kick_vcpus)(VcpuControl::RunState(self.saved_run_mode));
1845        }
1846    }
1847}
1848
1849/// A guard to guarantee that all devices are sleeping during its scope.
1850///
1851/// When this guard is dropped, it wakes the devices.
1852pub struct DeviceSleepGuard<'a> {
1853    device_control_tube: &'a Tube,
1854    devices_state: DevicesState,
1855}
1856
1857impl<'a> DeviceSleepGuard<'a> {
1858    fn new(device_control_tube: &'a Tube) -> anyhow::Result<Self> {
1859        device_control_tube
1860            .send(&DeviceControlCommand::GetDevicesState)
1861            .context("send command to devices control socket")?;
1862        let devices_state = match device_control_tube
1863            .recv()
1864            .context("receive from devices control socket")?
1865        {
1866            VmResponse::DevicesState(state) => state,
1867            resp => bail!("failed to get devices state. Unexpected behavior: {}", resp),
1868        };
1869        if let DevicesState::Wake = devices_state {
1870            device_control_tube
1871                .send(&DeviceControlCommand::SleepDevices)
1872                .context("send command to devices control socket")?;
1873            match device_control_tube
1874                .recv()
1875                .context("receive from devices control socket")?
1876            {
1877                VmResponse::Ok => (),
1878                resp => bail!("device sleep failed: {}", resp),
1879            }
1880        }
1881        Ok(Self {
1882            device_control_tube,
1883            devices_state,
1884        })
1885    }
1886}
1887
1888impl Drop for DeviceSleepGuard<'_> {
1889    fn drop(&mut self) {
1890        if let DevicesState::Wake = self.devices_state {
1891            if let Err(e) = self
1892                .device_control_tube
1893                .send(&DeviceControlCommand::WakeDevices)
1894            {
1895                panic!("failed to request device wake after snapshot: {e}");
1896            }
1897            match self.device_control_tube.recv() {
1898                Ok(VmResponse::Ok) => (),
1899                Ok(resp) => panic!("unexpected response to device wake request: {resp}"),
1900                Err(e) => panic!("failed to get reply for device wake request: {e}"),
1901            }
1902        }
1903    }
1904}
1905
1906impl VmRequest {
1907    /// Executes this request on the given Vm and other mutable state.
1908    ///
1909    /// This does not return a result, instead encapsulating the success or failure in a
1910    /// `VmResponse` with the intended purpose of sending the response back over the  socket that
1911    /// received this `VmRequest`.
1912    ///
1913    /// `suspended_pvclock_state`: If the hypervisor has its own pvclock (not the same as
1914    /// virtio-pvclock) and the VM is suspended (not just the vCPUs, but the full VM), then
1915    /// `suspended_pvclock_state` will be used to store the ClockState saved just after the vCPUs
1916    /// were suspended. It is important that we save the value right after the vCPUs are suspended
1917    /// and restore it right before the vCPUs are resumed (instead of, more naturally, during the
1918    /// snapshot/restore steps) because the pvclock continues to tick even when the vCPUs are
1919    /// suspended.
1920    #[allow(unused_variables)]
1921    pub fn execute(
1922        &self,
1923        vm: &dyn Vm,
1924        disk_host_tubes: &[Tube],
1925        snd_host_tubes: &[Tube],
1926        pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
1927        gpu_control_tube: Option<&Tube>,
1928        usb_control_tube: Option<&Tube>,
1929        bat_control: &mut Option<BatControl>,
1930        kick_vcpus: impl Fn(VcpuControl),
1931        #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl),
1932        force_s2idle: bool,
1933        #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1934        device_control_tube: &Tube,
1935        vcpu_size: usize,
1936        irq_handler_control: &Tube,
1937        snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
1938        suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
1939    ) -> VmResponse {
1940        match self {
1941            VmRequest::Exit => {
1942                panic!("VmRequest::Exit should be handled by the platform run loop");
1943            }
1944            VmRequest::Powerbtn => {
1945                if let Some(pm) = pm {
1946                    pm.lock().pwrbtn_evt();
1947                    VmResponse::Ok
1948                } else {
1949                    error!("{:#?} not supported", *self);
1950                    VmResponse::Err(SysError::new(ENOTSUP))
1951                }
1952            }
1953            VmRequest::Sleepbtn => {
1954                if let Some(pm) = pm {
1955                    pm.lock().slpbtn_evt();
1956                    VmResponse::Ok
1957                } else {
1958                    error!("{:#?} not supported", *self);
1959                    VmResponse::Err(SysError::new(ENOTSUP))
1960                }
1961            }
1962            VmRequest::Rtc { clear_evt } => {
1963                if let Some(pm) = pm.as_ref() {
1964                    match clear_evt.try_clone() {
1965                        Ok(clear_evt) => {
1966                            // RTC event will asynchronously trigger wakeup.
1967                            pm.lock().rtc_evt(clear_evt);
1968                            VmResponse::Ok
1969                        }
1970                        Err(err) => {
1971                            error!("Error cloning clear_evt: {:?}", err);
1972                            VmResponse::Err(SysError::new(EIO))
1973                        }
1974                    }
1975                } else {
1976                    error!("{:#?} not supported", *self);
1977                    VmResponse::Err(SysError::new(ENOTSUP))
1978                }
1979            }
1980            VmRequest::SuspendVcpus => {
1981                if !force_s2idle {
1982                    kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1983                    let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
1984                        Ok(state) => state,
1985                        Err(e) => {
1986                            error!("failed to get vcpu state: {e}");
1987                            return VmResponse::Err(SysError::new(EIO));
1988                        }
1989                    };
1990                    if current_mode != VmRunMode::Suspending {
1991                        error!("vCPUs failed to all suspend.");
1992                        return VmResponse::Err(SysError::new(EIO));
1993                    }
1994                }
1995                VmResponse::Ok
1996            }
1997            VmRequest::ResumeVcpus => {
1998                if let Err(e) = device_control_tube.send(&DeviceControlCommand::GetDevicesState) {
1999                    error!("failed to send GetDevicesState: {}", e);
2000                    return VmResponse::Err(SysError::new(EIO));
2001                }
2002                let devices_state = match device_control_tube.recv() {
2003                    Ok(VmResponse::DevicesState(state)) => state,
2004                    Ok(resp) => {
2005                        error!("failed to get devices state. Unexpected behavior: {}", resp);
2006                        return VmResponse::Err(SysError::new(EINVAL));
2007                    }
2008                    Err(e) => {
2009                        error!("failed to get devices state. Unexpected behavior: {}", e);
2010                        return VmResponse::Err(SysError::new(EINVAL));
2011                    }
2012                };
2013                if let DevicesState::Sleep = devices_state {
2014                    error!("Trying to wake Vcpus while Devices are asleep. Did you mean to use `crosvm resume --full`?");
2015                    return VmResponse::Err(SysError::new(EINVAL));
2016                }
2017
2018                if force_s2idle {
2019                    // During resume also emulate powerbtn event which will allow to wakeup fully
2020                    // suspended guest.
2021                    if let Some(pm) = pm {
2022                        pm.lock().pwrbtn_evt();
2023                    } else {
2024                        error!("triggering power btn during resume not supported");
2025                        return VmResponse::Err(SysError::new(ENOTSUP));
2026                    }
2027                }
2028
2029                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2030                VmResponse::Ok
2031            }
2032            VmRequest::Swap(SwapCommand::Enable) => {
2033                #[cfg(feature = "swap")]
2034                if let Some(swap_controller) = swap_controller {
2035                    // Suspend all vcpus and devices while vmm-swap is enabling (move the guest
2036                    // memory contents to the staging memory) to guarantee no processes other than
2037                    // the swap monitor process access the guest memory.
2038                    let _vcpu_guard = match VcpuSuspendGuard::new(&kick_vcpus, vcpu_size) {
2039                        Ok(guard) => guard,
2040                        Err(e) => {
2041                            error!("failed to suspend vcpus: {:?}", e);
2042                            return VmResponse::Err(SysError::new(EINVAL));
2043                        }
2044                    };
2045                    // TODO(b/253386409): Use `devices::Suspendable::sleep()` instead of sending
2046                    // `SIGSTOP` signal.
2047                    let _devices_guard = match swap_controller.suspend_devices() {
2048                        Ok(guard) => guard,
2049                        Err(e) => {
2050                            error!("failed to suspend devices: {:?}", e);
2051                            return VmResponse::Err(SysError::new(EINVAL));
2052                        }
2053                    };
2054
2055                    return match swap_controller.enable() {
2056                        Ok(()) => VmResponse::Ok,
2057                        Err(e) => {
2058                            error!("swap enable failed: {}", e);
2059                            VmResponse::Err(SysError::new(EINVAL))
2060                        }
2061                    };
2062                }
2063                VmResponse::Err(SysError::new(ENOTSUP))
2064            }
2065            VmRequest::Swap(SwapCommand::Trim) => {
2066                #[cfg(feature = "swap")]
2067                if let Some(swap_controller) = swap_controller {
2068                    return match swap_controller.trim() {
2069                        Ok(()) => VmResponse::Ok,
2070                        Err(e) => {
2071                            error!("swap trim failed: {}", e);
2072                            VmResponse::Err(SysError::new(EINVAL))
2073                        }
2074                    };
2075                }
2076                VmResponse::Err(SysError::new(ENOTSUP))
2077            }
2078            VmRequest::Swap(SwapCommand::SwapOut) => {
2079                #[cfg(feature = "swap")]
2080                if let Some(swap_controller) = swap_controller {
2081                    return match swap_controller.swap_out() {
2082                        Ok(()) => VmResponse::Ok,
2083                        Err(e) => {
2084                            error!("swap out failed: {}", e);
2085                            VmResponse::Err(SysError::new(EINVAL))
2086                        }
2087                    };
2088                }
2089                VmResponse::Err(SysError::new(ENOTSUP))
2090            }
2091            VmRequest::Swap(SwapCommand::Disable {
2092                #[cfg(feature = "swap")]
2093                slow_file_cleanup,
2094                ..
2095            }) => {
2096                #[cfg(feature = "swap")]
2097                if let Some(swap_controller) = swap_controller {
2098                    return match swap_controller.disable(*slow_file_cleanup) {
2099                        Ok(()) => VmResponse::Ok,
2100                        Err(e) => {
2101                            error!("swap disable failed: {}", e);
2102                            VmResponse::Err(SysError::new(EINVAL))
2103                        }
2104                    };
2105                }
2106                VmResponse::Err(SysError::new(ENOTSUP))
2107            }
2108            VmRequest::Swap(SwapCommand::Status) => {
2109                #[cfg(feature = "swap")]
2110                if let Some(swap_controller) = swap_controller {
2111                    return match swap_controller.status() {
2112                        Ok(status) => VmResponse::SwapStatus(status),
2113                        Err(e) => {
2114                            error!("swap status failed: {}", e);
2115                            VmResponse::Err(SysError::new(EINVAL))
2116                        }
2117                    };
2118                }
2119                VmResponse::Err(SysError::new(ENOTSUP))
2120            }
2121            VmRequest::SuspendVm => {
2122                info!("Starting crosvm suspend");
2123                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
2124                let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
2125                    Ok(state) => state,
2126                    Err(e) => {
2127                        error!("failed to get vcpu state: {e}");
2128                        return VmResponse::Err(SysError::new(EIO));
2129                    }
2130                };
2131                if current_mode != VmRunMode::Suspending {
2132                    error!("vCPUs failed to all suspend.");
2133                    return VmResponse::Err(SysError::new(EIO));
2134                }
2135                // Snapshot the pvclock ASAP after stopping vCPUs.
2136                if vm.check_capability(VmCap::PvClock) {
2137                    if suspended_pvclock_state.is_none() {
2138                        *suspended_pvclock_state = Some(match vm.get_pvclock() {
2139                            Ok(x) => x,
2140                            Err(e) => {
2141                                error!("suspend_pvclock failed: {e:?}");
2142                                return VmResponse::Err(SysError::new(EIO));
2143                            }
2144                        });
2145                    }
2146                }
2147                if let Err(e) = device_control_tube
2148                    .send(&DeviceControlCommand::SleepDevices)
2149                    .context("send command to devices control socket")
2150                {
2151                    error!("{:?}", e);
2152                    return VmResponse::Err(SysError::new(EIO));
2153                };
2154                match device_control_tube
2155                    .recv()
2156                    .context("receive from devices control socket")
2157                {
2158                    Ok(VmResponse::Ok) => {
2159                        info!("Finished crosvm suspend successfully");
2160                        VmResponse::Ok
2161                    }
2162                    Ok(resp) => {
2163                        error!("device sleep failed: {}", resp);
2164                        VmResponse::Err(SysError::new(EIO))
2165                    }
2166                    Err(e) => {
2167                        error!("receive from devices control socket: {:?}", e);
2168                        VmResponse::Err(SysError::new(EIO))
2169                    }
2170                }
2171            }
2172            VmRequest::ResumeVm => {
2173                info!("Starting crosvm resume");
2174                if let Err(e) = device_control_tube
2175                    .send(&DeviceControlCommand::WakeDevices)
2176                    .context("send command to devices control socket")
2177                {
2178                    error!("{:?}", e);
2179                    return VmResponse::Err(SysError::new(EIO));
2180                };
2181                match device_control_tube
2182                    .recv()
2183                    .context("receive from devices control socket")
2184                {
2185                    Ok(VmResponse::Ok) => {
2186                        info!("Finished crosvm resume successfully");
2187                    }
2188                    Ok(resp) => {
2189                        error!("device wake failed: {}", resp);
2190                        return VmResponse::Err(SysError::new(EIO));
2191                    }
2192                    Err(e) => {
2193                        error!("receive from devices control socket: {:?}", e);
2194                        return VmResponse::Err(SysError::new(EIO));
2195                    }
2196                }
2197                // Resume the pvclock as late as possible before starting vCPUs.
2198                if vm.check_capability(VmCap::PvClock) {
2199                    // If None, then we aren't suspended, which is a valid case.
2200                    if let Some(x) = suspended_pvclock_state {
2201                        if let Err(e) = vm.set_pvclock(x) {
2202                            error!("resume_pvclock failed: {e:?}");
2203                            return VmResponse::Err(SysError::new(EIO));
2204                        }
2205                    }
2206                }
2207                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2208                VmResponse::Ok
2209            }
2210            VmRequest::Gpe { gpe, clear_evt } => {
2211                if let Some(pm) = pm.as_ref() {
2212                    match clear_evt.as_ref().map(|e| e.try_clone()).transpose() {
2213                        Ok(clear_evt) => {
2214                            pm.lock().gpe_evt(*gpe, clear_evt);
2215                            VmResponse::Ok
2216                        }
2217                        Err(err) => {
2218                            error!("Error cloning clear_evt: {:?}", err);
2219                            VmResponse::Err(SysError::new(EIO))
2220                        }
2221                    }
2222                } else {
2223                    error!("{:#?} not supported", *self);
2224                    VmResponse::Err(SysError::new(ENOTSUP))
2225                }
2226            }
2227            VmRequest::PciPme(requester_id) => {
2228                if let Some(pm) = pm.as_ref() {
2229                    pm.lock().pme_evt(*requester_id);
2230                    VmResponse::Ok
2231                } else {
2232                    error!("{:#?} not supported", *self);
2233                    VmResponse::Err(SysError::new(ENOTSUP))
2234                }
2235            }
2236            VmRequest::MakeRT => {
2237                kick_vcpus(VcpuControl::MakeRT);
2238                VmResponse::Ok
2239            }
2240            #[cfg(feature = "balloon")]
2241            VmRequest::BalloonCommand(_) => unreachable!("Should be handled with BalloonTube"),
2242            VmRequest::DiskCommand {
2243                disk_index,
2244                ref command,
2245            } => match &disk_host_tubes.get(*disk_index) {
2246                Some(tube) => handle_disk_command(command, tube),
2247                None => VmResponse::Err(SysError::new(ENODEV)),
2248            },
2249            VmRequest::GpuCommand(ref cmd) => match gpu_control_tube {
2250                Some(gpu_control) => {
2251                    let res = gpu_control.send(cmd);
2252                    if let Err(e) = res {
2253                        error!("fail to send command to gpu control socket: {}", e);
2254                        return VmResponse::Err(SysError::new(EIO));
2255                    }
2256                    match gpu_control.recv() {
2257                        Ok(response) => VmResponse::GpuResponse(response),
2258                        Err(e) => {
2259                            error!("fail to recv command from gpu control socket: {}", e);
2260                            VmResponse::Err(SysError::new(EIO))
2261                        }
2262                    }
2263                }
2264                None => {
2265                    error!("gpu control is not enabled in crosvm");
2266                    VmResponse::Err(SysError::new(EIO))
2267                }
2268            },
2269            VmRequest::UsbCommand(ref cmd) => {
2270                let usb_control_tube = match usb_control_tube {
2271                    Some(t) => t,
2272                    None => {
2273                        error!("attempted to execute USB request without control tube");
2274                        return VmResponse::Err(SysError::new(ENODEV));
2275                    }
2276                };
2277                let res = usb_control_tube.send(cmd);
2278                if let Err(e) = res {
2279                    error!("fail to send command to usb control socket: {}", e);
2280                    return VmResponse::Err(SysError::new(EIO));
2281                }
2282                match usb_control_tube.recv() {
2283                    Ok(response) => VmResponse::UsbResponse(response),
2284                    Err(e) => {
2285                        error!("fail to recv command from usb control socket: {}", e);
2286                        VmResponse::Err(SysError::new(EIO))
2287                    }
2288                }
2289            }
2290            VmRequest::BatCommand(type_, ref cmd) => {
2291                match bat_control {
2292                    Some(battery) => {
2293                        if battery.type_ != *type_ {
2294                            error!("ignored battery command due to battery type: expected {:?}, got {:?}", battery.type_, type_);
2295                            return VmResponse::Err(SysError::new(EINVAL));
2296                        }
2297
2298                        let res = battery.control_tube.send(cmd);
2299                        if let Err(e) = res {
2300                            error!("fail to send command to bat control socket: {}", e);
2301                            return VmResponse::Err(SysError::new(EIO));
2302                        }
2303
2304                        match battery.control_tube.recv() {
2305                            Ok(response) => VmResponse::BatResponse(response),
2306                            Err(e) => {
2307                                error!("fail to recv command from bat control socket: {}", e);
2308                                VmResponse::Err(SysError::new(EIO))
2309                            }
2310                        }
2311                    }
2312                    None => VmResponse::BatResponse(BatControlResult::NoBatDevice),
2313                }
2314            }
2315            #[cfg(feature = "audio")]
2316            VmRequest::SndCommand(ref cmd) => match cmd {
2317                SndControlCommand::MuteAll(muted) => {
2318                    for tube in snd_host_tubes {
2319                        let res = tube.send(&SndControlCommand::MuteAll(*muted));
2320                        if let Err(e) = res {
2321                            error!("fail to send command to snd control socket: {}", e);
2322                            return VmResponse::Err(SysError::new(EIO));
2323                        }
2324
2325                        match tube.recv() {
2326                            Ok(VmResponse::Ok) => {
2327                                debug!("device is successfully muted");
2328                            }
2329                            Ok(resp) => {
2330                                error!("mute failed: {}", resp);
2331                                return VmResponse::ErrString("fail to mute the device".to_owned());
2332                            }
2333                            Err(e) => return VmResponse::Err(SysError::new(EIO)),
2334                        }
2335                    }
2336                    VmResponse::Ok
2337                }
2338            },
2339            VmRequest::HotPlugVfioCommand { device: _, add: _ } => VmResponse::Ok,
2340            #[cfg(feature = "pci-hotplug")]
2341            VmRequest::HotPlugNetCommand(ref _net_cmd) => {
2342                VmResponse::ErrString("hot plug not supported".to_owned())
2343            }
2344            VmRequest::Snapshot(SnapshotCommand::Take {
2345                ref snapshot_path,
2346                compress_memory,
2347                encrypt,
2348            }) => {
2349                info!("Starting crosvm snapshot");
2350                match do_snapshot(
2351                    snapshot_path.to_path_buf(),
2352                    kick_vcpus,
2353                    irq_handler_control,
2354                    device_control_tube,
2355                    vcpu_size,
2356                    snapshot_irqchip,
2357                    *compress_memory,
2358                    *encrypt,
2359                    suspended_pvclock_state,
2360                    vm,
2361                ) {
2362                    Ok(()) => {
2363                        info!("Finished crosvm snapshot successfully");
2364                        VmResponse::Ok
2365                    }
2366                    Err(e) => {
2367                        error!("failed to handle snapshot: {:?}", e);
2368                        VmResponse::Err(SysError::new(EIO))
2369                    }
2370                }
2371            }
2372            VmRequest::RegisterListener {
2373                socket_addr: _,
2374                event: _,
2375            } => VmResponse::Ok,
2376            VmRequest::UnregisterListener {
2377                socket_addr: _,
2378                event: _,
2379            } => VmResponse::Ok,
2380            VmRequest::Unregister { socket_addr: _ } => VmResponse::Ok,
2381            VmRequest::VcpuPidTid => unreachable!(),
2382            VmRequest::Throttle(_, _) => unreachable!(),
2383            VmRequest::GetVmDescriptor => {
2384                let vm_fd = match vm.try_clone_descriptor() {
2385                    Ok(vm_fd) => vm_fd,
2386                    Err(e) => {
2387                        error!("failed to get vm_fd: {:?}", e);
2388                        return VmResponse::Err(e);
2389                    }
2390                };
2391                VmResponse::VmDescriptor {
2392                    hypervisor: vm.hypervisor_kind(),
2393                    vm_fd,
2394                }
2395            }
2396            VmRequest::RegisterMemory { .. } => unreachable!(),
2397            VmRequest::UnregisterMemory { .. } => unreachable!(),
2398        }
2399    }
2400}
2401
2402/// Snapshot the VM to file at `snapshot_path`
2403fn do_snapshot(
2404    snapshot_path: PathBuf,
2405    kick_vcpus: impl Fn(VcpuControl),
2406    irq_handler_control: &Tube,
2407    device_control_tube: &Tube,
2408    vcpu_size: usize,
2409    snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
2410    compress_memory: bool,
2411    encrypt: bool,
2412    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2413    vm: &dyn Vm,
2414) -> anyhow::Result<()> {
2415    let snapshot_start = Instant::now();
2416
2417    let _vcpu_guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size)?;
2418    let _device_guard = DeviceSleepGuard::new(device_control_tube)?;
2419
2420    // We want to flush all pending IRQs to the interrupt controller. There are two cases:
2421    //
2422    // MSIs: these are directly delivered to the interrupt controller.
2423    // We must verify the handler thread cycles once to deliver these interrupts.
2424    //
2425    // Legacy interrupts: in the case of a split IRQ chip, these interrupts may
2426    // flow through the userspace IOAPIC. If the hypervisor does not support
2427    // irqfds (e.g. WHPX), a single iteration will only flush the IRQ to the
2428    // IOAPIC. The underlying MSI will be asserted at this point, but if the
2429    // IRQ handler doesn't run another iteration, it won't be delivered to the
2430    // interrupt controller. This is why we cycle the handler thread twice (doing so
2431    // ensures we process the underlying MSI).
2432    //
2433    // We can handle both of these cases by iterating until there are no tokens
2434    // serviced on the requested iteration. Note that in the legacy case, this
2435    // ensures at least two iterations.
2436    //
2437    // Note: within CrosVM, *all* interrupts are eventually converted into the
2438    // same mechanicism that MSIs use. This is why we say "underlying" MSI for
2439    // a legacy IRQ.
2440    {
2441        let mut flush_attempts = 0;
2442        loop {
2443            irq_handler_control
2444                .send(&IrqHandlerRequest::WakeAndNotifyIteration)
2445                .context("failed to send flush command to IRQ handler thread")?;
2446            let resp = irq_handler_control
2447                .recv()
2448                .context("failed to recv flush response from IRQ handler thread")?;
2449            match resp {
2450                IrqHandlerResponse::HandlerIterationComplete(tokens_serviced) => {
2451                    if tokens_serviced == 0 {
2452                        break;
2453                    }
2454                }
2455                _ => bail!("received unexpected reply from IRQ handler: {:?}", resp),
2456            }
2457            flush_attempts += 1;
2458            if flush_attempts > EXPECTED_MAX_IRQ_FLUSH_ITERATIONS {
2459                warn!(
2460                    "flushing IRQs for snapshot may be stalled after iteration {}, expected <= {}
2461                      iterations",
2462                    flush_attempts, EXPECTED_MAX_IRQ_FLUSH_ITERATIONS
2463                );
2464            }
2465        }
2466        info!("flushed IRQs in {} iterations", flush_attempts);
2467    }
2468    let snapshot_writer = SnapshotWriter::new(snapshot_path, encrypt)?;
2469
2470    // Snapshot hypervisor's paravirtualized clock.
2471    snapshot_writer.write_fragment("pvclock", &AnySnapshot::to_any(suspended_pvclock_state)?)?;
2472
2473    // Snapshot Vcpus
2474    info!("VCPUs snapshotting...");
2475    let (send_chan, recv_chan) = mpsc::channel();
2476    kick_vcpus(VcpuControl::Snapshot(
2477        snapshot_writer.add_namespace("vcpu")?,
2478        send_chan,
2479    ));
2480    // Validate all Vcpus snapshot successfully
2481    for _ in 0..vcpu_size {
2482        recv_chan
2483            .recv()
2484            .context("Failed to recv Vcpu snapshot response")?
2485            .context("Failed to snapshot Vcpu")?;
2486    }
2487    info!("VCPUs snapshotted.");
2488
2489    // Snapshot irqchip
2490    info!("Snapshotting irqchip...");
2491    let irqchip_snap = snapshot_irqchip()?;
2492    snapshot_writer
2493        .write_fragment("irqchip", &irqchip_snap)
2494        .context("Failed to write irqchip state")?;
2495    info!("Snapshotted irqchip.");
2496
2497    // Snapshot memory
2498    {
2499        let mem_snap_start = Instant::now();
2500        // Use 64MB chunks when writing the memory snapshot (if encryption is used).
2501        const MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES: usize = 1024 * 1024 * 64;
2502        // SAFETY:
2503        // VM & devices are stopped.
2504        let guest_memory_metadata = unsafe {
2505            vm.get_memory()
2506                .snapshot(
2507                    &mut snapshot_writer.raw_fragment_with_chunk_size(
2508                        "mem",
2509                        MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES,
2510                    )?,
2511                    compress_memory,
2512                )
2513                .context("failed to snapshot memory")?
2514        };
2515        snapshot_writer.write_fragment("mem_metadata", &guest_memory_metadata)?;
2516
2517        let mem_snap_duration_ms = mem_snap_start.elapsed().as_millis();
2518        info!(
2519            "snapshot: memory snapshotted {}MB in {}ms",
2520            vm.get_memory().memory_size() / 1024 / 1024,
2521            mem_snap_duration_ms
2522        );
2523        metrics::log_metric_with_details(
2524            metrics::MetricEventType::SnapshotSaveMemoryLatency,
2525            mem_snap_duration_ms as i64,
2526            &metrics_events::RecordDetails {},
2527        );
2528    }
2529    // Snapshot devices
2530    info!("Devices snapshotting...");
2531    device_control_tube
2532        .send(&DeviceControlCommand::SnapshotDevices { snapshot_writer })
2533        .context("send command to devices control socket")?;
2534    let resp: VmResponse = device_control_tube
2535        .recv()
2536        .context("receive from devices control socket")?;
2537    if !matches!(resp, VmResponse::Ok) {
2538        bail!("unexpected SnapshotDevices response: {resp}");
2539    }
2540    info!("Devices snapshotted.");
2541
2542    let snap_duration_ms = snapshot_start.elapsed().as_millis();
2543    info!(
2544        "snapshot: completed snapshot in {}ms; VM mem size: {}MB",
2545        snap_duration_ms,
2546        vm.get_memory().memory_size() / 1024 / 1024,
2547    );
2548    metrics::log_metric_with_details(
2549        metrics::MetricEventType::SnapshotSaveOverallLatency,
2550        snap_duration_ms as i64,
2551        &metrics_events::RecordDetails {},
2552    );
2553    Ok(())
2554}
2555
2556/// Restore the VM to the snapshot at `restore_path`.
2557///
2558/// Same as `VmRequest::execute` with a `VmRequest::Restore`. Exposed as a separate function
2559/// because not all the `VmRequest::execute` arguments are available in the "cold restore" flow.
2560pub fn do_restore(
2561    restore_path: &Path,
2562    kick_vcpus: impl Fn(VcpuControl),
2563    kick_vcpu: impl Fn(VcpuControl, usize),
2564    irq_handler_control: &Tube,
2565    device_control_tube: &Tube,
2566    vcpu_size: usize,
2567    mut restore_irqchip: impl FnMut(AnySnapshot) -> anyhow::Result<()>,
2568    require_encrypted: bool,
2569    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2570    vm: &dyn Vm,
2571) -> anyhow::Result<()> {
2572    let restore_start = Instant::now();
2573    let _guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size);
2574    let _devices_guard = DeviceSleepGuard::new(device_control_tube)?;
2575
2576    let snapshot_reader = SnapshotReader::new(restore_path, require_encrypted)?;
2577
2578    // Restore hypervisor's paravirtualized clock.
2579    *suspended_pvclock_state = snapshot_reader.read_fragment("pvclock")?;
2580
2581    // Restore IrqChip
2582    let irq_snapshot: AnySnapshot = snapshot_reader.read_fragment("irqchip")?;
2583    restore_irqchip(irq_snapshot)?;
2584
2585    // Restore Vcpu(s)
2586    let vcpu_snapshot_reader = snapshot_reader.namespace("vcpu")?;
2587    let vcpu_snapshot_count = vcpu_snapshot_reader.list_fragments()?.len();
2588    if vcpu_snapshot_count != vcpu_size {
2589        bail!(
2590            "bad cpu count in snapshot: expected={} got={}",
2591            vcpu_size,
2592            vcpu_snapshot_count,
2593        );
2594    }
2595    #[cfg(target_arch = "x86_64")]
2596    let host_tsc_reference_moment = {
2597        // SAFETY: rdtsc takes no arguments.
2598        unsafe { _rdtsc() }
2599    };
2600    let (send_chan, recv_chan) = mpsc::channel();
2601    for vcpu_id in 0..vcpu_size {
2602        kick_vcpu(
2603            VcpuControl::Restore(VcpuRestoreRequest {
2604                result_sender: send_chan.clone(),
2605                snapshot_reader: vcpu_snapshot_reader.clone(),
2606                #[cfg(target_arch = "x86_64")]
2607                host_tsc_reference_moment,
2608            }),
2609            vcpu_id,
2610        );
2611    }
2612    for _ in 0..vcpu_size {
2613        recv_chan
2614            .recv()
2615            .context("Failed to recv restore response")?
2616            .context("Failed to restore vcpu")?;
2617    }
2618
2619    // Restore Memory
2620    {
2621        let mem_restore_start = Instant::now();
2622        let guest_memory_metadata = snapshot_reader.read_fragment("mem_metadata")?;
2623        // SAFETY:
2624        // VM & devices are stopped.
2625        unsafe {
2626            vm.get_memory().restore(
2627                guest_memory_metadata,
2628                &mut snapshot_reader.raw_fragment("mem")?,
2629            )?
2630        };
2631        let mem_restore_duration_ms = mem_restore_start.elapsed().as_millis();
2632        info!(
2633            "snapshot: memory restored {}MB in {}ms",
2634            vm.get_memory().memory_size() / 1024 / 1024,
2635            mem_restore_duration_ms
2636        );
2637        metrics::log_metric_with_details(
2638            metrics::MetricEventType::SnapshotRestoreMemoryLatency,
2639            mem_restore_duration_ms as i64,
2640            &metrics_events::RecordDetails {},
2641        );
2642    }
2643    // Restore devices
2644    device_control_tube
2645        .send(&DeviceControlCommand::RestoreDevices {
2646            snapshot_reader: snapshot_reader.clone(),
2647        })
2648        .context("send restore devices command to devices control socket")?;
2649    let resp: VmResponse = device_control_tube
2650        .recv()
2651        .context("receive from devices control socket")?;
2652    if !matches!(resp, VmResponse::Ok) {
2653        bail!("unexpected RestoreDevices response: {resp}");
2654    }
2655
2656    // refresh the IRQ tokens.
2657    {
2658        irq_handler_control
2659            .send(&IrqHandlerRequest::RefreshIrqEventTokens)
2660            .context("failed to send refresh irq event token command to IRQ handler thread")?;
2661        let resp: IrqHandlerResponse = irq_handler_control
2662            .recv()
2663            .context("failed to recv refresh response from IRQ handler thread")?;
2664        if !matches!(resp, IrqHandlerResponse::IrqEventTokenRefreshComplete) {
2665            bail!(
2666                "received unexpected reply from IRQ handler thread: {:?}",
2667                resp
2668            );
2669        }
2670    }
2671
2672    let restore_duration_ms = restore_start.elapsed().as_millis();
2673    info!(
2674        "snapshot: completed restore in {}ms; mem size: {}",
2675        restore_duration_ms,
2676        vm.get_memory().memory_size(),
2677    );
2678
2679    metrics::log_metric_with_details(
2680        metrics::MetricEventType::SnapshotRestoreOverallLatency,
2681        restore_duration_ms as i64,
2682        &metrics_events::RecordDetails {},
2683    );
2684    Ok(())
2685}
2686
2687pub type HypervisorKind = hypervisor::HypervisorKind;
2688
2689/// Indication of success or failure of a `VmRequest`.
2690///
2691/// Success is usually indicated `VmResponse::Ok` unless there is data associated with the response.
2692#[derive(Serialize, Deserialize, Debug)]
2693#[must_use]
2694pub enum VmResponse {
2695    /// Indicates the request was executed successfully.
2696    Ok,
2697    /// Indicates the request encountered some error during execution.
2698    Err(SysError),
2699    /// Indicates the request encountered some error during execution.
2700    ErrString(String),
2701    /// The memory was registered into guest address space in memory slot number `slot`.
2702    RegisterMemory { slot: u32 },
2703    /// Variant of the register memory but with region_id.
2704    RegisterMemory2 { region_id: u64 },
2705    /// Results of balloon control commands.
2706    #[cfg(feature = "balloon")]
2707    BalloonStats {
2708        stats: balloon_control::BalloonStats,
2709        balloon_actual: u64,
2710    },
2711    /// Results of balloon WS-R command
2712    #[cfg(feature = "balloon")]
2713    BalloonWS {
2714        ws: balloon_control::BalloonWS,
2715        balloon_actual: u64,
2716    },
2717    /// Results of PCI hot plug
2718    #[cfg(feature = "pci-hotplug")]
2719    PciHotPlugResponse { bus: u8 },
2720    /// Results of usb control commands.
2721    UsbResponse(UsbControlResult),
2722    /// Results of gpu control commands.
2723    GpuResponse(GpuControlResult),
2724    /// Results of battery control commands.
2725    BatResponse(BatControlResult),
2726    /// Results of swap status command.
2727    SwapStatus(SwapStatus),
2728    /// Gets the state of Devices (sleep/wake)
2729    DevicesState(DevicesState),
2730    /// Map of the Vcpu PID/TIDs
2731    VcpuPidTidResponse {
2732        pid_tid_map: BTreeMap<usize, (u32, u32)>,
2733    },
2734    VmDescriptor {
2735        hypervisor: HypervisorKind,
2736        vm_fd: SafeDescriptor,
2737    },
2738}
2739
2740impl Display for VmResponse {
2741    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2742        use self::VmResponse::*;
2743
2744        match self {
2745            Ok => write!(f, "ok"),
2746            Err(e) => write!(f, "error: {e}"),
2747            ErrString(e) => write!(f, "error: {e}"),
2748            RegisterMemory { slot } => write!(f, "memory registered in slot {slot}"),
2749            RegisterMemory2 { region_id } => {
2750                write!(f, "memory registered in region id {region_id}")
2751            }
2752            #[cfg(feature = "balloon")]
2753            VmResponse::BalloonStats {
2754                stats,
2755                balloon_actual,
2756            } => {
2757                write!(
2758                    f,
2759                    "stats: {}\nballoon_actual: {}",
2760                    serde_json::to_string_pretty(&stats)
2761                        .unwrap_or_else(|_| "invalid_response".to_string()),
2762                    balloon_actual
2763                )
2764            }
2765            #[cfg(feature = "balloon")]
2766            VmResponse::BalloonWS { ws, balloon_actual } => {
2767                write!(
2768                    f,
2769                    "ws: {}, balloon_actual: {}",
2770                    serde_json::to_string_pretty(&ws)
2771                        .unwrap_or_else(|_| "invalid_response".to_string()),
2772                    balloon_actual,
2773                )
2774            }
2775            UsbResponse(result) => write!(f, "usb control request get result {result:?}"),
2776            #[cfg(feature = "pci-hotplug")]
2777            PciHotPlugResponse { bus } => write!(f, "pci hotplug bus {bus:?}"),
2778            GpuResponse(result) => write!(f, "gpu control request result {result:?}"),
2779            BatResponse(result) => write!(f, "{result}"),
2780            SwapStatus(status) => {
2781                write!(
2782                    f,
2783                    "{}",
2784                    serde_json::to_string(&status)
2785                        .unwrap_or_else(|_| "invalid_response".to_string()),
2786                )
2787            }
2788            DevicesState(status) => write!(f, "devices status: {status:?}"),
2789            VcpuPidTidResponse { pid_tid_map } => write!(f, "vcpu pid tid map: {pid_tid_map:?}"),
2790            VmDescriptor { hypervisor, vm_fd } => {
2791                write!(f, "hypervisor: {hypervisor:?}, vm_fd: {vm_fd:?}")
2792            }
2793        }
2794    }
2795}
2796
2797/// Enum that allows remote control of a wait context (used between the Windows GpuDisplay & the
2798/// GPU worker).
2799#[derive(Serialize, Deserialize)]
2800pub enum ModifyWaitContext {
2801    Add(#[serde(with = "with_as_descriptor")] Descriptor),
2802}
2803
2804#[sorted]
2805#[derive(Error, Debug)]
2806pub enum VirtioIOMMUVfioError {
2807    #[error("socket failed")]
2808    SocketFailed,
2809    #[error("unexpected response: {0}")]
2810    UnexpectedResponse(VirtioIOMMUResponse),
2811    #[error("unknown command: `{0}`")]
2812    UnknownCommand(String),
2813    #[error("{0}")]
2814    VfioControl(VirtioIOMMUVfioResult),
2815}
2816
2817#[derive(Serialize, Deserialize, Debug)]
2818pub enum VirtioIOMMUVfioCommand {
2819    // Add the vfio device attached to virtio-iommu.
2820    VfioDeviceAdd {
2821        endpoint_addr: u32,
2822        wrapper_id: u32,
2823        #[serde(with = "with_as_descriptor")]
2824        container: File,
2825    },
2826    // Delete the vfio device attached to virtio-iommu.
2827    VfioDeviceDel {
2828        endpoint_addr: u32,
2829    },
2830    // Map a dma-buf into vfio iommu table
2831    VfioDmabufMap {
2832        region_id: VmMemoryRegionId,
2833        gpa: u64,
2834        size: u64,
2835        dma_buf: SafeDescriptor,
2836    },
2837    // Unmap a dma-buf from vfio iommu table
2838    VfioDmabufUnmap(VmMemoryRegionId),
2839}
2840
2841#[derive(Serialize, Deserialize, Debug)]
2842pub enum VirtioIOMMUVfioResult {
2843    Ok,
2844    NotInPCIRanges,
2845    NoAvailableContainer,
2846    NoSuchDevice,
2847    NoSuchMappedDmabuf,
2848    InvalidParam,
2849}
2850
2851impl Display for VirtioIOMMUVfioResult {
2852    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2853        use self::VirtioIOMMUVfioResult::*;
2854
2855        match self {
2856            Ok => write!(f, "successfully"),
2857            NotInPCIRanges => write!(f, "not in the pci ranges of virtio-iommu"),
2858            NoAvailableContainer => write!(f, "no available vfio container"),
2859            NoSuchDevice => write!(f, "no such a vfio device"),
2860            NoSuchMappedDmabuf => write!(f, "no such a mapped dmabuf"),
2861            InvalidParam => write!(f, "invalid parameters"),
2862        }
2863    }
2864}
2865
2866/// A request to the virtio-iommu process to perform some operations.
2867///
2868/// Unless otherwise noted, each request should expect a `VirtioIOMMUResponse::Ok` to be received on
2869/// success.
2870#[derive(Serialize, Deserialize, Debug)]
2871pub enum VirtioIOMMURequest {
2872    /// Command for vfio related operations.
2873    VfioCommand(VirtioIOMMUVfioCommand),
2874}
2875
2876/// Indication of success or failure of a `VirtioIOMMURequest`.
2877///
2878/// Success is usually indicated `VirtioIOMMUResponse::Ok` unless there is data associated with the
2879/// response.
2880#[derive(Serialize, Deserialize, Debug)]
2881pub enum VirtioIOMMUResponse {
2882    /// Indicates the request was executed successfully.
2883    Ok,
2884    /// Indicates the request encountered some error during execution.
2885    Err(SysError),
2886    /// Results for Vfio commands.
2887    VfioResponse(VirtioIOMMUVfioResult),
2888}
2889
2890impl Display for VirtioIOMMUResponse {
2891    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2892        use self::VirtioIOMMUResponse::*;
2893        match self {
2894            Ok => write!(f, "ok"),
2895            Err(e) => write!(f, "error: {e}"),
2896            VfioResponse(result) => write!(
2897                f,
2898                "The vfio-related virtio-iommu request got result: {result:?}"
2899            ),
2900        }
2901    }
2902}
2903
2904/// Send VirtioIOMMURequest without waiting for the response
2905pub fn virtio_iommu_request_async(
2906    iommu_control_tube: &Tube,
2907    req: &VirtioIOMMURequest,
2908) -> VirtioIOMMUResponse {
2909    match iommu_control_tube.send(&req) {
2910        Ok(_) => VirtioIOMMUResponse::Ok,
2911        Err(e) => {
2912            error!("virtio-iommu socket send failed: {:?}", e);
2913            VirtioIOMMUResponse::Err(SysError::last())
2914        }
2915    }
2916}
2917
2918pub type VirtioIOMMURequestResult = std::result::Result<VirtioIOMMUResponse, ()>;
2919
2920/// Send VirtioIOMMURequest and wait to get the response
2921pub fn virtio_iommu_request(
2922    iommu_control_tube: &Tube,
2923    req: &VirtioIOMMURequest,
2924) -> VirtioIOMMURequestResult {
2925    let response = match virtio_iommu_request_async(iommu_control_tube, req) {
2926        VirtioIOMMUResponse::Ok => match iommu_control_tube.recv() {
2927            Ok(response) => response,
2928            Err(e) => {
2929                error!("virtio-iommu socket recv failed: {:?}", e);
2930                VirtioIOMMUResponse::Err(SysError::last())
2931            }
2932        },
2933        resp => resp,
2934    };
2935    Ok(response)
2936}
2937
2938#[cfg(test)]
2939mod tests {
2940    use anyhow::anyhow;
2941
2942    use super::*;
2943
2944    #[test]
2945    fn vm_memory_response_error_should_serialize_and_deserialize_correctly() {
2946        let source_error: VmMemoryResponseError = anyhow!("root cause")
2947            .context("context 1")
2948            .context("context 2")
2949            .into();
2950        let serialized_bytes =
2951            serde_json::to_vec(&source_error).expect("should serialize to json successfully");
2952        let target_error = serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2953            .expect("should deserialize from json successfully");
2954        assert_eq!(source_error.0.to_string(), target_error.0.to_string());
2955        assert_eq!(
2956            source_error
2957                .0
2958                .chain()
2959                .map(ToString::to_string)
2960                .collect::<Vec<_>>(),
2961            target_error
2962                .0
2963                .chain()
2964                .map(ToString::to_string)
2965                .collect::<Vec<_>>()
2966        );
2967    }
2968
2969    #[test]
2970    fn vm_memory_response_error_deserialization_should_handle_malformat_correctly() {
2971        let flat_source = FlatVmMemoryResponseError(vec![]);
2972        let serialized_bytes =
2973            serde_json::to_vec(&flat_source).expect("should serialize to json successfully");
2974        serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2975            .expect_err("deserialize with 0 error messages should fail");
2976    }
2977}