vm_control/
lib.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Handles IPC for controlling the main VM process.
6//!
7//! The VM Control IPC protocol is synchronous, meaning that each `VmRequest` sent over a connection
8//! will receive a `VmResponse` for that request next time data is received over that connection.
9//!
10//! The wire message format is a little-endian C-struct of fixed size, along with a file descriptor
11//! if the request type expects one.
12
13pub mod api;
14
15mod device_id;
16pub use device_id::DeviceId;
17pub use device_id::PciId;
18pub use device_id::PlatformDeviceId;
19
20#[cfg(feature = "gdb")]
21pub mod gdb;
22#[cfg(feature = "gpu")]
23pub mod gpu;
24
25use base::debug;
26#[cfg(any(target_os = "android", target_os = "linux"))]
27use base::linux::MemoryMappingBuilderUnix;
28#[cfg(any(target_os = "android", target_os = "linux"))]
29use base::sys::call_with_extended_max_files;
30#[cfg(any(target_os = "android", target_os = "linux"))]
31use base::MemoryMappingArena;
32#[cfg(windows)]
33use base::MemoryMappingBuilderWindows;
34use hypervisor::BalloonEvent;
35use hypervisor::MemCacheType;
36use hypervisor::MemRegion;
37use snapshot::AnySnapshot;
38
39#[cfg(feature = "balloon")]
40mod balloon_tube;
41pub mod client;
42pub mod sys;
43
44#[cfg(target_arch = "x86_64")]
45use std::arch::x86_64::_rdtsc;
46use std::collections::BTreeMap;
47use std::collections::BTreeSet;
48use std::collections::HashMap;
49use std::convert::TryInto;
50use std::fmt;
51use std::fmt::Display;
52use std::fs::File;
53use std::path::Path;
54use std::path::PathBuf;
55use std::result::Result as StdResult;
56use std::str::FromStr;
57use std::sync::mpsc;
58use std::sync::Arc;
59use std::time::Instant;
60
61use anyhow::bail;
62use anyhow::Context;
63use base::error;
64use base::info;
65use base::warn;
66use base::with_as_descriptor;
67use base::AsRawDescriptor;
68use base::Descriptor;
69use base::Error as SysError;
70use base::Event;
71use base::ExternalMapping;
72use base::IntoRawDescriptor;
73use base::MappedRegion;
74use base::MemoryMappingBuilder;
75use base::MmapError;
76use base::Protection;
77use base::Result;
78use base::SafeDescriptor;
79use base::SharedMemory;
80use base::Tube;
81use hypervisor::Datamatch;
82use hypervisor::IoEventAddress;
83use hypervisor::IrqRoute;
84use hypervisor::IrqSource;
85pub use hypervisor::MemSlot;
86use hypervisor::Vm;
87use hypervisor::VmCap;
88use libc::EINVAL;
89use libc::EIO;
90use libc::ENODEV;
91use libc::ENOTSUP;
92use libc::ERANGE;
93#[cfg(feature = "registered_events")]
94use protos::registered_events;
95use remain::sorted;
96use resources::Alloc;
97use resources::SystemAllocator;
98use rutabaga_gfx::RutabagaDescriptor;
99use rutabaga_gfx::RutabagaFromRawDescriptor;
100use rutabaga_gfx::RutabagaGralloc;
101use rutabaga_gfx::RutabagaHandle;
102use rutabaga_gfx::RutabagaMappedRegion;
103use rutabaga_gfx::VulkanInfo;
104use serde::de::Error;
105use serde::Deserialize;
106use serde::Serialize;
107use snapshot::SnapshotReader;
108use snapshot::SnapshotWriter;
109use swap::SwapStatus;
110use sync::Mutex;
111#[cfg(any(target_os = "android", target_os = "linux"))]
112pub use sys::FsMappingRequest;
113#[cfg(windows)]
114pub use sys::InitialAudioSessionState;
115#[cfg(any(target_os = "android", target_os = "linux"))]
116pub use sys::VmMemoryMappingRequest;
117#[cfg(any(target_os = "android", target_os = "linux"))]
118pub use sys::VmMemoryMappingResponse;
119use thiserror::Error;
120pub use vm_control_product::GpuSendToMain;
121pub use vm_control_product::GpuSendToService;
122pub use vm_control_product::ServiceSendToGpu;
123use vm_memory::GuestAddress;
124
125#[cfg(feature = "balloon")]
126pub use crate::balloon_tube::BalloonControlCommand;
127#[cfg(feature = "balloon")]
128pub use crate::balloon_tube::BalloonTube;
129#[cfg(feature = "gdb")]
130pub use crate::gdb::VcpuDebug;
131#[cfg(feature = "gdb")]
132pub use crate::gdb::VcpuDebugStatus;
133#[cfg(feature = "gdb")]
134pub use crate::gdb::VcpuDebugStatusMessage;
135#[cfg(feature = "gpu")]
136use crate::gpu::GpuControlCommand;
137#[cfg(feature = "gpu")]
138use crate::gpu::GpuControlResult;
139
140/// Control the state of a particular VM CPU.
141#[derive(Clone, Debug)]
142pub enum VcpuControl {
143    #[cfg(feature = "gdb")]
144    Debug(VcpuDebug),
145    RunState(VmRunMode),
146    MakeRT,
147    // Request the current state of the vCPU. The result is sent back over the included channel.
148    GetStates(mpsc::Sender<VmRunMode>),
149    // Request the vcpu write a snapshot of itself to the writer, then send a `Result` back over
150    // the channel after completion/failure.
151    Snapshot(SnapshotWriter, mpsc::Sender<anyhow::Result<()>>),
152    Restore(VcpuRestoreRequest),
153    #[cfg(any(target_os = "android", target_os = "linux"))]
154    Throttle(u32),
155}
156
157/// Request to restore a Vcpu from a given snapshot, and report the results
158/// back via the provided channel.
159#[derive(Clone, Debug)]
160pub struct VcpuRestoreRequest {
161    pub result_sender: mpsc::Sender<anyhow::Result<()>>,
162    pub snapshot_reader: SnapshotReader,
163    #[cfg(target_arch = "x86_64")]
164    pub host_tsc_reference_moment: u64,
165}
166
167/// Mode of execution for the VM.
168#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
169pub enum VmRunMode {
170    /// The default run mode indicating the VCPUs are running.
171    #[default]
172    Running,
173    /// Indicates that the VCPUs are suspending execution until the `Running` mode is set.
174    Suspending,
175    /// Indicates that the VM is exiting all processes.
176    Exiting,
177    /// Indicates that the VM is in a breakpoint waiting for the debugger to do continue.
178    Breakpoint,
179}
180
181impl Display for VmRunMode {
182    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
183        use self::VmRunMode::*;
184
185        match self {
186            Running => write!(f, "running"),
187            Suspending => write!(f, "suspending"),
188            Exiting => write!(f, "exiting"),
189            Breakpoint => write!(f, "breakpoint"),
190        }
191    }
192}
193
194// Trait for devices that get notification on specific GPE trigger
195pub trait GpeNotify: Send {
196    fn notify(&mut self) {}
197}
198
199// Trait for devices that get notification on specific PCI PME
200pub trait PmeNotify: Send {
201    fn notify(&mut self, _requester_id: u16) {}
202}
203
204pub trait PmResource {
205    fn pwrbtn_evt(&mut self) {}
206    fn slpbtn_evt(&mut self) {}
207    fn rtc_evt(&mut self, _clear_evt: Event) {}
208    fn gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>) {}
209    fn pme_evt(&mut self, _requester_id: u16) {}
210    fn register_gpe_notify_dev(&mut self, _gpe: u32, _notify_dev: Arc<Mutex<dyn GpeNotify>>) {}
211    fn register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>) {}
212}
213
214/// The maximum number of devices that can be listed in one `UsbControlCommand`.
215///
216/// This value was set to be equal to `xhci_regs::MAX_PORTS` for convenience, but it is not
217/// necessary for correctness. Importing that value directly would be overkill because it would
218/// require adding a big dependency for a single const.
219pub const USB_CONTROL_MAX_PORTS: usize = 16;
220
221#[derive(Serialize, Deserialize, Debug)]
222pub enum DiskControlCommand {
223    /// Resize a disk to `new_size` in bytes.
224    Resize { new_size: u64 },
225}
226
227impl Display for DiskControlCommand {
228    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
229        use self::DiskControlCommand::*;
230
231        match self {
232            Resize { new_size } => write!(f, "disk_resize {new_size}"),
233        }
234    }
235}
236
237#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
238pub enum DiskControlResult {
239    Ok,
240    Err(SysError),
241}
242
243/// Net control commands for adding and removing tap devices.
244#[cfg(feature = "pci-hotplug")]
245#[derive(Serialize, Deserialize, Debug)]
246pub enum NetControlCommand {
247    AddTap(String),
248    RemoveTap(u8),
249}
250
251#[derive(Serialize, Deserialize, Debug)]
252pub enum UsbControlCommand {
253    AttachDevice {
254        #[serde(with = "with_as_descriptor")]
255        file: File,
256    },
257    AttachSecurityKey {
258        #[serde(with = "with_as_descriptor")]
259        file: File,
260    },
261    DetachDevice {
262        port: u8,
263    },
264    ListDevice {
265        ports: [u8; USB_CONTROL_MAX_PORTS],
266    },
267}
268
269#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default)]
270pub struct UsbControlAttachedDevice {
271    pub port: u8,
272    pub vendor_id: u16,
273    pub product_id: u16,
274}
275
276impl UsbControlAttachedDevice {
277    pub fn valid(self) -> bool {
278        self.port != 0
279    }
280}
281
282#[cfg(feature = "pci-hotplug")]
283#[derive(Serialize, Deserialize, Debug, Clone)]
284#[must_use]
285/// Result for hotplug and removal of PCI device.
286pub enum PciControlResult {
287    AddOk { bus: u8 },
288    ErrString(String),
289    RemoveOk,
290}
291
292#[cfg(feature = "pci-hotplug")]
293impl Display for PciControlResult {
294    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
295        use self::PciControlResult::*;
296
297        match self {
298            AddOk { bus } => write!(f, "add_ok {bus}"),
299            ErrString(e) => write!(f, "error: {e}"),
300            RemoveOk => write!(f, "remove_ok"),
301        }
302    }
303}
304
305#[derive(Serialize, Deserialize, Debug, Clone)]
306pub enum UsbControlResult {
307    Ok { port: u8 },
308    NoAvailablePort,
309    NoSuchDevice,
310    NoSuchPort,
311    FailedToOpenDevice,
312    Devices([UsbControlAttachedDevice; USB_CONTROL_MAX_PORTS]),
313    FailedToInitHostDevice,
314}
315
316impl Display for UsbControlResult {
317    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
318        use self::UsbControlResult::*;
319
320        match self {
321            UsbControlResult::Ok { port } => write!(f, "ok {port}"),
322            NoAvailablePort => write!(f, "no_available_port"),
323            NoSuchDevice => write!(f, "no_such_device"),
324            NoSuchPort => write!(f, "no_such_port"),
325            FailedToOpenDevice => write!(f, "failed_to_open_device"),
326            Devices(devices) => {
327                write!(f, "devices")?;
328                for d in devices.iter().filter(|d| d.valid()) {
329                    write!(f, " {} {:04x} {:04x}", d.port, d.vendor_id, d.product_id)?;
330                }
331                std::result::Result::Ok(())
332            }
333            FailedToInitHostDevice => write!(f, "failed_to_init_host_device"),
334        }
335    }
336}
337
338/// Commands for snapshot feature
339#[derive(Serialize, Deserialize, Debug)]
340pub enum SnapshotCommand {
341    Take {
342        snapshot_path: PathBuf,
343        compress_memory: bool,
344        encrypt: bool,
345    },
346}
347
348/// Commands for actions on devices and the devices control thread.
349#[derive(Serialize, Deserialize, Debug)]
350pub enum DeviceControlCommand {
351    SleepDevices,
352    WakeDevices,
353    SnapshotDevices { snapshot_writer: SnapshotWriter },
354    RestoreDevices { snapshot_reader: SnapshotReader },
355    GetDevicesState,
356    Exit,
357}
358
359/// Commands to control the IRQ handler thread.
360#[derive(Serialize, Deserialize)]
361pub enum IrqHandlerRequest {
362    /// No response is sent for this command.
363    AddIrqControlTubes(Vec<Tube>),
364    /// Refreshes the set of event tokens (Events) from the Irqchip that the IRQ
365    /// handler waits on to forward IRQs to their final destination (e.g. via
366    /// Irqchip::service_irq_event).
367    ///
368    /// If the set of tokens exposed by the Irqchip changes while the VM is
369    /// running (such as for snapshot restore), this command must be sent
370    /// otherwise the VM will not receive IRQs as expected.
371    RefreshIrqEventTokens,
372    WakeAndNotifyIteration,
373    /// No response is sent for this command.
374    Exit,
375}
376
377const EXPECTED_MAX_IRQ_FLUSH_ITERATIONS: usize = 100;
378
379/// Response for [IrqHandlerRequest].
380#[derive(Serialize, Deserialize, Debug)]
381pub enum IrqHandlerResponse {
382    /// Sent when the IRQ event tokens have been refreshed.
383    IrqEventTokenRefreshComplete,
384    /// Specifies the number of tokens serviced in the requested iteration
385    /// (less the token for the `WakeAndNotifyIteration` request).
386    HandlerIterationComplete(usize),
387}
388
389/// Source of a `VmMemoryRequest::RegisterMemory` mapping.
390#[derive(Serialize, Deserialize)]
391pub enum VmMemorySource {
392    /// Register shared memory represented by the given descriptor.
393    /// On Windows, descriptor MUST be a mapping handle.
394    SharedMemory(SharedMemory),
395    /// Register a file mapping from the given descriptor.
396    Descriptor {
397        /// File descriptor to map.
398        descriptor: SafeDescriptor,
399        /// Offset within the file in bytes.
400        offset: u64,
401        /// Size of the mapping in bytes.
402        size: u64,
403    },
404    /// Register memory mapped by Vulkano.
405    Vulkan {
406        descriptor: SafeDescriptor,
407        handle_type: u32,
408        memory_idx: u32,
409        device_uuid: [u8; 16],
410        driver_uuid: [u8; 16],
411        size: u64,
412    },
413    /// Register the current rutabaga external mapping.
414    ExternalMapping { ptr: u64, size: u64 },
415}
416
417// The following are wrappers to avoid base dependencies in the rutabaga crate
418fn to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor {
419    // SAFETY:
420    // Safe because we own the SafeDescriptor at this point.
421    unsafe { RutabagaDescriptor::from_raw_descriptor(s.into_raw_descriptor()) }
422}
423
424struct RutabagaMemoryRegion {
425    region: Box<dyn RutabagaMappedRegion>,
426}
427
428impl RutabagaMemoryRegion {
429    pub fn new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion {
430        RutabagaMemoryRegion { region }
431    }
432}
433
434// SAFETY:
435//
436// Self guarantees `ptr`..`ptr+size` is an mmaped region owned by this object that
437// can't be unmapped during the `MappedRegion`'s lifetime.
438unsafe impl MappedRegion for RutabagaMemoryRegion {
439    fn as_ptr(&self) -> *mut u8 {
440        self.region.as_ptr()
441    }
442
443    fn size(&self) -> usize {
444        self.region.size()
445    }
446}
447
448impl Display for VmMemorySource {
449    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
450        use self::VmMemorySource::*;
451
452        match self {
453            SharedMemory(..) => write!(f, "VmMemorySource::SharedMemory"),
454            Descriptor { .. } => write!(f, "VmMemorySource::Descriptor"),
455            Vulkan { .. } => write!(f, "VmMemorySource::Vulkan"),
456            ExternalMapping { .. } => write!(f, "VmMemorySource::ExternalMapping"),
457        }
458    }
459}
460
461impl VmMemorySource {
462    /// Map the resource and return its mapping and size in bytes.
463    fn map(
464        self,
465        gralloc: &mut RutabagaGralloc,
466        prot: Protection,
467    ) -> anyhow::Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)> {
468        let (mem_region, size, descriptor) = match self {
469            VmMemorySource::Descriptor {
470                descriptor,
471                offset,
472                size,
473            } => (
474                map_descriptor(&descriptor, offset, size, prot)?,
475                size,
476                Some(descriptor),
477            ),
478
479            VmMemorySource::SharedMemory(shm) => {
480                (map_descriptor(&shm, 0, shm.size(), prot)?, shm.size(), None)
481            }
482            VmMemorySource::Vulkan {
483                descriptor,
484                handle_type,
485                memory_idx,
486                device_uuid,
487                driver_uuid,
488                size,
489            } => {
490                let device_id = rutabaga_gfx::DeviceId {
491                    device_uuid,
492                    driver_uuid,
493                };
494                let mapped_region = gralloc
495                    .import_and_map(
496                        RutabagaHandle {
497                            os_handle: to_rutabaga_desciptor(descriptor),
498                            handle_type,
499                        },
500                        VulkanInfo {
501                            memory_idx,
502                            device_id,
503                        },
504                        size,
505                    )
506                    .with_context(|| {
507                        format!(
508                            "gralloc failed to import and map, handle type: {handle_type}, memory index {memory_idx}, \
509                             size: {size}"
510                        )
511                    })?;
512                let mapped_region: Box<dyn MappedRegion> =
513                    Box::new(RutabagaMemoryRegion::new(mapped_region));
514                (mapped_region, size, None)
515            }
516            VmMemorySource::ExternalMapping { ptr, size } => {
517                let mapped_region: Box<dyn MappedRegion> = Box::new(ExternalMapping {
518                    ptr,
519                    size: size as usize,
520                });
521                (mapped_region, size, None)
522            }
523        };
524        Ok((mem_region, size, descriptor))
525    }
526}
527
528/// Destination of a `VmMemoryRequest::RegisterMemory` mapping in guest address space.
529#[derive(Serialize, Deserialize)]
530pub enum VmMemoryDestination {
531    /// Map at an offset within an existing PCI BAR allocation.
532    ExistingAllocation { allocation: Alloc, offset: u64 },
533    /// Map at the specified guest physical address.
534    GuestPhysicalAddress(u64),
535}
536
537impl VmMemoryDestination {
538    /// Allocate and return the guest address of a memory mapping destination.
539    pub fn allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress> {
540        let addr = match self {
541            VmMemoryDestination::ExistingAllocation { allocation, offset } => allocator
542                .mmio_allocator_any()
543                .address_from_pci_offset(allocation, offset, size)
544                .map_err(|_e| SysError::new(EINVAL))?,
545            VmMemoryDestination::GuestPhysicalAddress(gpa) => gpa,
546        };
547        Ok(GuestAddress(addr))
548    }
549}
550
551/// Request to register or unregister an ioevent.
552#[derive(Serialize, Deserialize)]
553pub struct IoEventUpdateRequest {
554    pub event: Event,
555    pub addr: u64,
556    pub datamatch: Datamatch,
557    pub register: bool,
558}
559
560/// Request to mmap a file to a shared memory.
561/// This request is supposed to follow a `VmMemoryRequest::MmapAndRegisterMemory` request that
562/// contains `SharedMemory` that `file` is mmaped to.
563#[cfg(any(target_os = "android", target_os = "linux"))]
564#[derive(Serialize, Deserialize)]
565pub struct VmMemoryFileMapping {
566    #[serde(with = "with_as_descriptor")]
567    pub file: File,
568    pub length: usize,
569    pub mem_offset: usize,
570    pub file_offset: u64,
571}
572
573#[derive(Serialize, Deserialize)]
574pub enum VmMemoryRequest {
575    /// Prepare a shared memory region to make later operations more efficient. This
576    /// may be a no-op depending on underlying platform support.
577    PrepareSharedMemoryRegion { alloc: Alloc, cache: MemCacheType },
578    /// Register a memory to be mapped to the guest.
579    RegisterMemory {
580        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
581        source: VmMemorySource,
582        /// Where to map the memory in the guest.
583        dest: VmMemoryDestination,
584        /// Whether to map the memory read only (true) or read-write (false).
585        prot: Protection,
586        /// Cache attribute for guest memory setting
587        cache: MemCacheType,
588    },
589    #[cfg(any(target_os = "android", target_os = "linux"))]
590    /// Call mmap to `shm` and register the memory region as a read-only guest memory.
591    /// This request is followed by an array of `VmMemoryFileMapping` with length
592    /// `num_file_mappings`
593    MmapAndRegisterMemory {
594        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
595        shm: SharedMemory,
596        /// Where to map the memory in the guest.
597        dest: VmMemoryDestination,
598        /// Length of the array of `VmMemoryFileMapping` that follows.
599        num_file_mappings: usize,
600    },
601    /// Call hypervisor to free the given memory range.
602    DynamicallyFreeMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
603    /// Call hypervisor to reclaim a priorly freed memory range.
604    DynamicallyReclaimMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
605    /// Balloon allocation/deallocation target reached.
606    BalloonTargetReached { size: u64 },
607    /// Unregister the given memory slot that was previously registered with `RegisterMemory`.
608    UnregisterMemory(VmMemoryRegionId),
609    /// Register an eventfd with raw guest memory address.
610    IoEventRaw(IoEventUpdateRequest),
611}
612
613/// Struct for managing `VmMemoryRequest`s IOMMU related state.
614pub struct VmMemoryRequestIommuClient {
615    tube: Arc<Mutex<Tube>>,
616    registered_memory: BTreeSet<VmMemoryRegionId>,
617}
618
619impl VmMemoryRequestIommuClient {
620    /// Constructs `VmMemoryRequestIommuClient` from a tube for communication with the viommu.
621    pub fn new(tube: Arc<Mutex<Tube>>) -> Self {
622        Self {
623            tube,
624            registered_memory: BTreeSet::new(),
625        }
626    }
627}
628
629enum RegisteredMemory {
630    FixedMapping {
631        slot: MemSlot,
632        offset: usize,
633        size: usize,
634    },
635    DynamicMapping {
636        slot: MemSlot,
637    },
638}
639
640pub struct VmMappedMemoryRegion {
641    guest_address: GuestAddress,
642    slot: MemSlot,
643}
644
645#[derive(Default)]
646pub struct VmMemoryRegionState {
647    mapped_regions: HashMap<Alloc, VmMappedMemoryRegion>,
648    registered_memory: BTreeMap<VmMemoryRegionId, RegisteredMemory>,
649}
650
651fn try_map_to_prepared_region(
652    vm: &mut impl Vm,
653    region_state: &mut VmMemoryRegionState,
654    source: &VmMemorySource,
655    dest: &VmMemoryDestination,
656    prot: &Protection,
657) -> Option<VmMemoryResponse> {
658    let VmMemoryDestination::ExistingAllocation {
659        allocation,
660        offset: dest_offset,
661    } = dest
662    else {
663        return None;
664    };
665
666    let VmMappedMemoryRegion {
667        guest_address,
668        slot,
669    } = region_state.mapped_regions.get(allocation)?;
670
671    let (descriptor, file_offset, size) = match source {
672        VmMemorySource::Descriptor {
673            descriptor,
674            offset,
675            size,
676        } => (
677            Descriptor(descriptor.as_raw_descriptor()),
678            *offset,
679            *size as usize,
680        ),
681        VmMemorySource::SharedMemory(shm) => {
682            let size = shm.size() as usize;
683            (Descriptor(shm.as_raw_descriptor()), 0, size)
684        }
685        _ => {
686            let error = anyhow::anyhow!(
687                "source {} is not compatible with fixed mapping into prepared memory region",
688                source
689            );
690            return Some(VmMemoryResponse::Err(error.into()));
691        }
692    };
693    if let Err(err) = vm
694        .add_fd_mapping(
695            *slot,
696            *dest_offset as usize,
697            size,
698            &descriptor,
699            file_offset,
700            *prot,
701        )
702        .context("failed to add fd mapping when trying to map to prepared region")
703    {
704        return Some(VmMemoryResponse::Err(err.into()));
705    }
706
707    let guest_address = GuestAddress(guest_address.0 + dest_offset);
708    let region_id = VmMemoryRegionId(guest_address);
709    region_state.registered_memory.insert(
710        region_id,
711        RegisteredMemory::FixedMapping {
712            slot: *slot,
713            offset: *dest_offset as usize,
714            size,
715        },
716    );
717
718    Some(VmMemoryResponse::RegisterMemory {
719        region_id,
720        slot: *slot,
721    })
722}
723
724impl VmMemoryRequest {
725    /// Executes this request on the given Vm.
726    ///
727    /// # Arguments
728    /// * `vm` - The `Vm` to perform the request on.
729    /// * `allocator` - Used to allocate addresses.
730    ///
731    /// This does not return a result, instead encapsulating the success or failure in a
732    /// `VmMemoryResponse` with the intended purpose of sending the response back over the socket
733    /// that received this `VmMemoryResponse`.
734    pub fn execute(
735        self,
736        #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube,
737        vm: &mut impl Vm,
738        sys_allocator: &mut SystemAllocator,
739        gralloc: &mut RutabagaGralloc,
740        iommu_client: Option<&mut VmMemoryRequestIommuClient>,
741        region_state: &mut VmMemoryRegionState,
742    ) -> VmMemoryResponse {
743        use self::VmMemoryRequest::*;
744        match self {
745            PrepareSharedMemoryRegion { alloc, cache } => {
746                // Currently the iommu_client is only used by virtio-gpu when used alongside GPU
747                // pci-passthrough.
748                //
749                // TODO(b/323368701): Make compatible with iommu_client by ensuring that
750                // VirtioIOMMUVfioCommand::VfioDmabufMap is submitted for both dynamic mappings and
751                // fixed mappings (i.e. whether or not try_map_to_prepared_region succeeds in
752                // RegisterMemory case below).
753                assert!(iommu_client.is_none());
754
755                if !sys::should_prepare_memory_region() {
756                    return VmMemoryResponse::Ok;
757                }
758
759                match sys::prepare_shared_memory_region(vm, sys_allocator, alloc, cache)
760                    .context("failed to prepare shared memory region")
761                {
762                    Ok(region) => {
763                        region_state.mapped_regions.insert(alloc, region);
764                        VmMemoryResponse::Ok
765                    }
766                    Err(e) => VmMemoryResponse::Err(e.into()),
767                }
768            }
769            RegisterMemory {
770                source,
771                dest,
772                prot,
773                cache,
774            } => {
775                if let Some(resp) =
776                    try_map_to_prepared_region(vm, region_state, &source, &dest, &prot)
777                {
778                    return resp;
779                }
780
781                // Correct on Windows because callers of this IPC guarantee descriptor is a mapping
782                // handle.
783                let (mapped_region, size, descriptor) =
784                    match source.map(gralloc, prot).context("gralloc mapping") {
785                        Ok((region, size, descriptor)) => (region, size, descriptor),
786                        Err(e) => return VmMemoryResponse::Err(e.into()),
787                    };
788
789                let guest_addr = match dest
790                    .allocate(sys_allocator, size)
791                    .context("VM memory destination allocation fails")
792                {
793                    Ok(addr) => addr,
794                    Err(e) => return VmMemoryResponse::Err(e.into()),
795                };
796
797                let slot = match vm
798                    .add_memory_region(
799                        guest_addr,
800                        mapped_region,
801                        prot == Protection::read(),
802                        false,
803                        cache,
804                    )
805                    .context("failed to add memory region when registering memory")
806                {
807                    Ok(slot) => slot,
808                    Err(e) => return VmMemoryResponse::Err(e.into()),
809                };
810
811                let region_id = VmMemoryRegionId(guest_addr);
812                if let (Some(descriptor), Some(iommu_client)) = (descriptor, iommu_client) {
813                    let request =
814                        VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDmabufMap {
815                            region_id,
816                            gpa: guest_addr.0,
817                            size,
818                            dma_buf: descriptor,
819                        });
820
821                    match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
822                        Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok)) => (),
823                        resp => {
824                            let error = anyhow::anyhow!(
825                                "Unexpected virtio-iommu message response when registering memory: \
826                                 {:?}", resp);
827                            if let Err(e) = vm.remove_memory_region(slot) {
828                                // There is nothing we can do here, so we just log a warning
829                                // message.
830                                warn!("failed to remove memory region: {:?}", e);
831                            }
832                            return VmMemoryResponse::Err(error.into());
833                        }
834                    };
835
836                    iommu_client.registered_memory.insert(region_id);
837                }
838
839                region_state
840                    .registered_memory
841                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
842                VmMemoryResponse::RegisterMemory { region_id, slot }
843            }
844            #[cfg(any(target_os = "android", target_os = "linux"))]
845            MmapAndRegisterMemory {
846                shm,
847                dest,
848                num_file_mappings,
849            } => {
850                // Define a callback to be executed with extended limit of file counts.
851                // It recieves `num_file_mappings` FDs and call `add_fd_mapping` for each.
852                let callback = || {
853                    let mem = match MemoryMappingBuilder::new(shm.size() as usize)
854                        .from_shared_memory(&shm)
855                        .build()
856                        .context("failed to build MemoryMapping from shared memory")
857                    {
858                        Ok(mem) => mem,
859                        Err(e) => return Err(VmMemoryResponse::Err(e.into())),
860                    };
861                    let mut mmap_arena = MemoryMappingArena::from(mem);
862
863                    // If `num_file_mappings` exceeds `SCM_MAX_FD`, `file_mappings` are sent in
864                    // chunks of length `SCM_MAX_FD`.
865                    let mut file_mappings = Vec::with_capacity(num_file_mappings);
866                    let mut read = 0;
867                    while read < num_file_mappings {
868                        let len = std::cmp::min(num_file_mappings - read, base::unix::SCM_MAX_FD);
869                        let mps: Vec<VmMemoryFileMapping> = match tube
870                            .recv_with_max_fds(len)
871                            .with_context(|| format!("get {num_file_mappings} FDs to be mapped"))
872                        {
873                            Ok(m) => m,
874                            Err(e) => return Err(VmMemoryResponse::Err(e.into())),
875                        };
876                        file_mappings.extend(mps.into_iter());
877                        read += len;
878                    }
879
880                    for VmMemoryFileMapping {
881                        mem_offset,
882                        length,
883                        file,
884                        file_offset,
885                    } in file_mappings
886                    {
887                        if let Err(e) = mmap_arena
888                            .add_fd_mapping(
889                                mem_offset,
890                                length,
891                                &file,
892                                file_offset,
893                                Protection::read(),
894                            )
895                            .context(
896                                "failed to add fd mapping when handling mmap and register memory",
897                            )
898                        {
899                            return Err(VmMemoryResponse::Err(e.into()));
900                        }
901                    }
902                    Ok(mmap_arena)
903                };
904                let mmap_arena = match call_with_extended_max_files(callback)
905                    .context("failed to set max count of file descriptors")
906                {
907                    Ok(Ok(m)) => m,
908                    Ok(Err(e)) => {
909                        return e;
910                    }
911                    Err(e) => {
912                        error!("{e:?}");
913                        return VmMemoryResponse::Err(e.into());
914                    }
915                };
916
917                let size = shm.size();
918                let guest_addr = match dest.allocate(sys_allocator, size).context(
919                    "VM memory destination allocation fails when handling mmap and register memory",
920                ) {
921                    Ok(addr) => addr,
922                    Err(e) => return VmMemoryResponse::Err(e.into()),
923                };
924
925                let slot = match vm
926                    .add_memory_region(
927                        guest_addr,
928                        Box::new(mmap_arena),
929                        true,
930                        false,
931                        MemCacheType::CacheCoherent,
932                    )
933                    .context("failed to add memory region when handling mmap and register memory")
934                {
935                    Ok(slot) => slot,
936                    Err(e) => return VmMemoryResponse::Err(e.into()),
937                };
938
939                let region_id = VmMemoryRegionId(guest_addr);
940
941                region_state
942                    .registered_memory
943                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
944
945                VmMemoryResponse::RegisterMemory { region_id, slot }
946            }
947            UnregisterMemory(id) => match region_state.registered_memory.remove(&id) {
948                Some(RegisteredMemory::DynamicMapping { slot }) => match vm
949                    .remove_memory_region(slot)
950                    .context(
951                        "failed to remove memory region when unregistering dynamic mapping memory",
952                    ) {
953                    Ok(_) => {
954                        if let Some(iommu_client) = iommu_client {
955                            if iommu_client.registered_memory.remove(&id) {
956                                let request = VirtioIOMMURequest::VfioCommand(
957                                    VirtioIOMMUVfioCommand::VfioDmabufUnmap(id),
958                                );
959
960                                match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
961                                    Ok(VirtioIOMMUResponse::VfioResponse(
962                                        VirtioIOMMUVfioResult::Ok,
963                                    )) => VmMemoryResponse::Ok,
964                                    resp => {
965                                        let error = anyhow::anyhow!(
966                                            "Unexpected virtio-iommu message response when \
967                                             unregistering memory: {:?}",
968                                            resp
969                                        );
970                                        VmMemoryResponse::Err(error.into())
971                                    }
972                                }
973                            } else {
974                                VmMemoryResponse::Ok
975                            }
976                        } else {
977                            VmMemoryResponse::Ok
978                        }
979                    }
980                    Err(e) => VmMemoryResponse::Err(e.into()),
981                },
982                Some(RegisteredMemory::FixedMapping { slot, offset, size }) => {
983                    match vm.remove_mapping(slot, offset, size).context(
984                        "failed to remove memory mapping when unregistering fixed mapping memory",
985                    ) {
986                        Ok(()) => VmMemoryResponse::Ok,
987                        Err(e) => VmMemoryResponse::Err(e.into()),
988                    }
989                }
990                None => {
991                    let error =
992                        anyhow::anyhow!("can't find the memory region when unregistering memory");
993                    VmMemoryResponse::Err(error.into())
994                }
995            },
996            DynamicallyFreeMemoryRanges { ranges } => {
997                let mut r = VmMemoryResponse::Ok;
998                for (guest_address, size) in ranges {
999                    match vm
1000                        .handle_balloon_event(BalloonEvent::Inflate(MemRegion {
1001                            guest_address,
1002                            size,
1003                        }))
1004                        .context(
1005                            "failed to handle the inflate balloon event when freeing memory ranges \
1006                             dynamically",
1007                        ) {
1008                        Ok(_) => {}
1009                        Err(e) => {
1010                            error!("{:?}", e);
1011                            r = VmMemoryResponse::Err(e.into());
1012                            break;
1013                        }
1014                    }
1015                }
1016                r
1017            }
1018            DynamicallyReclaimMemoryRanges { ranges } => {
1019                let mut r = VmMemoryResponse::Ok;
1020                for (guest_address, size) in ranges {
1021                    match vm
1022                        .handle_balloon_event(BalloonEvent::Deflate(MemRegion {
1023                            guest_address,
1024                            size,
1025                        }))
1026                        .context(
1027                            "failed to handle the deflate balloon event when reclaiming memory \
1028                             ranges dynamically",
1029                        ) {
1030                        Ok(_) => {}
1031                        Err(e) => {
1032                            error!("{:?}", e);
1033                            r = VmMemoryResponse::Err(e.into());
1034                            break;
1035                        }
1036                    }
1037                }
1038                r
1039            }
1040            BalloonTargetReached { size } => {
1041                match vm
1042                    .handle_balloon_event(BalloonEvent::BalloonTargetReached(size))
1043                    .context("failed to handle the target reached balloon event")
1044                {
1045                    Ok(_) => VmMemoryResponse::Ok,
1046                    Err(e) => VmMemoryResponse::Err(e.into()),
1047                }
1048            }
1049            IoEventRaw(request) => {
1050                let res = if request.register {
1051                    vm.register_ioevent(
1052                        &request.event,
1053                        IoEventAddress::Mmio(request.addr),
1054                        request.datamatch,
1055                    )
1056                    .context("failed to register IO event")
1057                } else {
1058                    vm.unregister_ioevent(
1059                        &request.event,
1060                        IoEventAddress::Mmio(request.addr),
1061                        request.datamatch,
1062                    )
1063                    .context("failed to unregister IO event")
1064                };
1065                match res {
1066                    Ok(_) => VmMemoryResponse::Ok,
1067                    Err(e) => VmMemoryResponse::Err(e.into()),
1068                }
1069            }
1070        }
1071    }
1072}
1073
1074#[derive(Serialize, Deserialize, Debug, PartialOrd, PartialEq, Eq, Ord, Clone, Copy)]
1075/// Identifer for registered memory regions. Globally unique.
1076// The current implementation uses guest physical address as the unique identifier.
1077pub struct VmMemoryRegionId(pub GuestAddress);
1078
1079#[derive(Serialize, Deserialize, Debug)]
1080pub enum VmMemoryResponse {
1081    /// The request to register memory into guest address space was successful.
1082    RegisterMemory {
1083        region_id: VmMemoryRegionId,
1084        slot: u32,
1085    },
1086    Ok,
1087    Err(VmMemoryResponseError),
1088}
1089
1090impl<T> From<Result<T>> for VmMemoryResponse {
1091    fn from(r: Result<T>) -> Self {
1092        match r {
1093            Ok(_) => VmMemoryResponse::Ok,
1094            Err(e) => VmMemoryResponse::Err(anyhow::Error::new(e).into()),
1095        }
1096    }
1097}
1098
1099#[derive(Debug, thiserror::Error)]
1100#[error("Vm memory response error: {0}")]
1101pub struct VmMemoryResponseError(#[from] pub anyhow::Error);
1102
1103impl TryFrom<FlatVmMemoryResponseError> for VmMemoryResponseError {
1104    type Error = anyhow::Error;
1105    fn try_from(value: FlatVmMemoryResponseError) -> StdResult<Self, Self::Error> {
1106        let inner = value
1107            .0
1108            .into_iter()
1109            .fold(
1110                None,
1111                |error: Option<anyhow::Error>, current_context| match error {
1112                    Some(error) => Some(error.context(current_context)),
1113                    None => Some(anyhow::Error::msg(current_context)),
1114                },
1115            )
1116            .context("should carry at least one error")?;
1117        Ok(Self(inner))
1118    }
1119}
1120
1121impl Serialize for VmMemoryResponseError {
1122    fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error>
1123    where
1124        S: serde::Serializer,
1125    {
1126        let flat: FlatVmMemoryResponseError = self.into();
1127        flat.serialize(serializer)
1128    }
1129}
1130
1131impl<'de> Deserialize<'de> for VmMemoryResponseError {
1132    fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error>
1133    where
1134        D: serde::Deserializer<'de>,
1135    {
1136        let flat = FlatVmMemoryResponseError::deserialize(deserializer)?;
1137        flat.try_into()
1138            .map_err(|e: anyhow::Error| D::Error::custom(e.to_string()))
1139    }
1140}
1141
1142#[derive(Debug, Serialize, Deserialize)]
1143struct FlatVmMemoryResponseError(Vec<String>);
1144
1145impl From<&VmMemoryResponseError> for FlatVmMemoryResponseError {
1146    fn from(value: &VmMemoryResponseError) -> Self {
1147        let contexts = value
1148            .0
1149            .chain()
1150            .map(ToString::to_string)
1151            .rev()
1152            .collect::<Vec<_>>();
1153        Self(contexts)
1154    }
1155}
1156
1157#[derive(Serialize, Deserialize, Debug)]
1158pub enum VmIrqRequest {
1159    /// Allocate one gsi, and associate gsi to irqfd with register_irqfd()
1160    AllocateOneMsi {
1161        irqfd: Event,
1162        device_id: DeviceId,
1163        queue_id: usize,
1164        device_name: String,
1165    },
1166    /// Allocate a specific gsi to irqfd with register_irqfd(). This must only
1167    /// be used when it is known that the gsi is free. Only the snapshot
1168    /// subsystem can make this guarantee, and use of this request by any other
1169    /// caller is strongly discouraged.
1170    AllocateOneMsiAtGsi {
1171        irqfd: Event,
1172        gsi: u32,
1173        device_id: DeviceId,
1174        queue_id: usize,
1175        device_name: String,
1176    },
1177    /// Add one msi route entry into the IRQ chip.
1178    AddMsiRoute {
1179        gsi: u32,
1180        msi_address: u64,
1181        msi_data: u32,
1182        #[cfg(target_arch = "aarch64")]
1183        pci_address: resources::PciAddress,
1184    },
1185    // unregister_irqfs() and release gsi
1186    ReleaseOneIrq {
1187        gsi: u32,
1188        irqfd: Event,
1189    },
1190}
1191
1192/// Data to set up an IRQ event or IRQ route on the IRQ chip.
1193/// VmIrqRequest::execute can't take an `IrqChip` argument, because of a dependency cycle between
1194/// devices and vm_control, so it takes a Fn that processes an `IrqSetup`.
1195pub enum IrqSetup<'a> {
1196    Event(u32, &'a Event, DeviceId, usize, String),
1197    Route(IrqRoute),
1198    UnRegister(u32, &'a Event),
1199}
1200
1201impl VmIrqRequest {
1202    /// Executes this request on the given Vm.
1203    ///
1204    /// # Arguments
1205    /// * `set_up_irq` - A function that applies an `IrqSetup` to an IRQ chip.
1206    ///
1207    /// This does not return a result, instead encapsulating the success or failure in a
1208    /// `VmIrqResponse` with the intended purpose of sending the response back over the socket
1209    /// that received this `VmIrqResponse`.
1210    pub fn execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse
1211    where
1212        F: FnOnce(IrqSetup) -> Result<()>,
1213    {
1214        use self::VmIrqRequest::*;
1215        match *self {
1216            AllocateOneMsi {
1217                ref irqfd,
1218                device_id,
1219                queue_id,
1220                ref device_name,
1221            } => {
1222                if let Some(irq_num) = sys_allocator.allocate_irq() {
1223                    match set_up_irq(IrqSetup::Event(
1224                        irq_num,
1225                        irqfd,
1226                        device_id,
1227                        queue_id,
1228                        device_name.clone(),
1229                    )) {
1230                        Ok(_) => VmIrqResponse::AllocateOneMsi { gsi: irq_num },
1231                        Err(e) => VmIrqResponse::Err(e),
1232                    }
1233                } else {
1234                    VmIrqResponse::Err(SysError::new(EINVAL))
1235                }
1236            }
1237            AllocateOneMsiAtGsi {
1238                ref irqfd,
1239                gsi,
1240                device_id,
1241                queue_id,
1242                ref device_name,
1243            } => {
1244                match set_up_irq(IrqSetup::Event(
1245                    gsi,
1246                    irqfd,
1247                    device_id,
1248                    queue_id,
1249                    device_name.clone(),
1250                )) {
1251                    Ok(_) => VmIrqResponse::Ok,
1252                    Err(e) => VmIrqResponse::Err(e),
1253                }
1254            }
1255            AddMsiRoute {
1256                gsi,
1257                msi_address,
1258                msi_data,
1259                #[cfg(target_arch = "aarch64")]
1260                pci_address,
1261            } => {
1262                let route = IrqRoute {
1263                    gsi,
1264                    source: IrqSource::Msi {
1265                        address: msi_address,
1266                        data: msi_data,
1267                        #[cfg(target_arch = "aarch64")]
1268                        pci_address,
1269                    },
1270                };
1271                match set_up_irq(IrqSetup::Route(route)) {
1272                    Ok(_) => VmIrqResponse::Ok,
1273                    Err(e) => VmIrqResponse::Err(e),
1274                }
1275            }
1276            ReleaseOneIrq { gsi, ref irqfd } => {
1277                let _ = set_up_irq(IrqSetup::UnRegister(gsi, irqfd));
1278                sys_allocator.release_irq(gsi);
1279                VmIrqResponse::Ok
1280            }
1281        }
1282    }
1283}
1284
1285#[derive(Serialize, Deserialize, Debug)]
1286pub enum VmIrqResponse {
1287    AllocateOneMsi { gsi: u32 },
1288    Ok,
1289    Err(SysError),
1290}
1291
1292#[derive(Serialize, Deserialize, Debug, Clone)]
1293pub enum DevicesState {
1294    Sleep,
1295    Wake,
1296}
1297
1298#[derive(Serialize, Deserialize, Debug, Clone)]
1299pub enum BatControlResult {
1300    Ok,
1301    NoBatDevice,
1302    NoSuchHealth,
1303    NoSuchProperty,
1304    NoSuchStatus,
1305    NoSuchBatType,
1306    StringParseIntErr,
1307    StringParseBoolErr,
1308}
1309
1310impl Display for BatControlResult {
1311    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1312        use self::BatControlResult::*;
1313
1314        match self {
1315            Ok => write!(f, "Setting battery property successfully"),
1316            NoBatDevice => write!(f, "No battery device created"),
1317            NoSuchHealth => write!(f, "Invalid Battery health setting. Only support: unknown/good/overheat/dead/overvoltage/unexpectedfailure/cold/watchdogtimerexpire/safetytimerexpire/overcurrent"),
1318            NoSuchProperty => write!(f, "Battery doesn't have such property. Only support: status/health/present/capacity/aconline"),
1319            NoSuchStatus => write!(f, "Invalid Battery status setting. Only support: unknown/charging/discharging/notcharging/full"),
1320            NoSuchBatType => write!(f, "Invalid Battery type setting. Only support: goldfish"),
1321            StringParseIntErr => write!(f, "Battery property target ParseInt error"),
1322            StringParseBoolErr => write!(f, "Battery property target ParseBool error"),
1323        }
1324    }
1325}
1326
1327#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, PartialEq, Eq)]
1328#[serde(rename_all = "kebab-case")]
1329pub enum BatteryType {
1330    #[default]
1331    Goldfish,
1332}
1333
1334impl FromStr for BatteryType {
1335    type Err = BatControlResult;
1336
1337    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1338        match s {
1339            "goldfish" => Ok(BatteryType::Goldfish),
1340            _ => Err(BatControlResult::NoSuchBatType),
1341        }
1342    }
1343}
1344
1345#[derive(Serialize, Deserialize, Debug)]
1346pub enum BatProperty {
1347    Status,
1348    Health,
1349    Present,
1350    Capacity,
1351    ACOnline,
1352    SetFakeBatConfig,
1353    CancelFakeBatConfig,
1354}
1355
1356impl FromStr for BatProperty {
1357    type Err = BatControlResult;
1358
1359    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1360        match s {
1361            "status" => Ok(BatProperty::Status),
1362            "health" => Ok(BatProperty::Health),
1363            "present" => Ok(BatProperty::Present),
1364            "capacity" => Ok(BatProperty::Capacity),
1365            "aconline" => Ok(BatProperty::ACOnline),
1366            "set_fake_bat_config" => Ok(BatProperty::SetFakeBatConfig),
1367            "cancel_fake_bat_config" => Ok(BatProperty::CancelFakeBatConfig),
1368            _ => Err(BatControlResult::NoSuchProperty),
1369        }
1370    }
1371}
1372
1373impl Display for BatProperty {
1374    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1375        match *self {
1376            BatProperty::Status => write!(f, "status"),
1377            BatProperty::Health => write!(f, "health"),
1378            BatProperty::Present => write!(f, "present"),
1379            BatProperty::Capacity => write!(f, "capacity"),
1380            BatProperty::ACOnline => write!(f, "aconline"),
1381            BatProperty::SetFakeBatConfig => write!(f, "set_fake_bat_config"),
1382            BatProperty::CancelFakeBatConfig => write!(f, "cancel_fake_bat_config"),
1383        }
1384    }
1385}
1386
1387#[derive(Serialize, Deserialize, Debug)]
1388pub enum BatStatus {
1389    Unknown,
1390    Charging,
1391    DisCharging,
1392    NotCharging,
1393    Full,
1394}
1395
1396impl BatStatus {
1397    pub fn new(status: String) -> std::result::Result<Self, BatControlResult> {
1398        match status.as_str() {
1399            "unknown" => Ok(BatStatus::Unknown),
1400            "charging" => Ok(BatStatus::Charging),
1401            "discharging" => Ok(BatStatus::DisCharging),
1402            "notcharging" => Ok(BatStatus::NotCharging),
1403            "full" => Ok(BatStatus::Full),
1404            _ => Err(BatControlResult::NoSuchStatus),
1405        }
1406    }
1407}
1408
1409impl FromStr for BatStatus {
1410    type Err = BatControlResult;
1411
1412    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1413        match s {
1414            "unknown" => Ok(BatStatus::Unknown),
1415            "charging" => Ok(BatStatus::Charging),
1416            "discharging" => Ok(BatStatus::DisCharging),
1417            "notcharging" => Ok(BatStatus::NotCharging),
1418            "full" => Ok(BatStatus::Full),
1419            _ => Err(BatControlResult::NoSuchStatus),
1420        }
1421    }
1422}
1423
1424impl From<BatStatus> for u32 {
1425    fn from(status: BatStatus) -> Self {
1426        status as u32
1427    }
1428}
1429
1430#[derive(Serialize, Deserialize, Debug)]
1431pub enum BatHealth {
1432    Unknown,
1433    Good,
1434    Overheat,
1435    Dead,
1436    OverVoltage,
1437    UnexpectedFailure,
1438    Cold,
1439    WatchdogTimerExpire,
1440    SafetyTimerExpire,
1441    OverCurrent,
1442}
1443
1444impl FromStr for BatHealth {
1445    type Err = BatControlResult;
1446
1447    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1448        match s {
1449            "unknown" => Ok(BatHealth::Unknown),
1450            "good" => Ok(BatHealth::Good),
1451            "overheat" => Ok(BatHealth::Overheat),
1452            "dead" => Ok(BatHealth::Dead),
1453            "overvoltage" => Ok(BatHealth::OverVoltage),
1454            "unexpectedfailure" => Ok(BatHealth::UnexpectedFailure),
1455            "cold" => Ok(BatHealth::Cold),
1456            "watchdogtimerexpire" => Ok(BatHealth::WatchdogTimerExpire),
1457            "safetytimerexpire" => Ok(BatHealth::SafetyTimerExpire),
1458            "overcurrent" => Ok(BatHealth::OverCurrent),
1459            _ => Err(BatControlResult::NoSuchHealth),
1460        }
1461    }
1462}
1463
1464impl From<BatHealth> for u32 {
1465    fn from(status: BatHealth) -> Self {
1466        status as u32
1467    }
1468}
1469
1470#[derive(Serialize, Deserialize, Debug)]
1471pub enum BatControlCommand {
1472    SetStatus(BatStatus),
1473    SetHealth(BatHealth),
1474    SetPresent(u32),
1475    SetCapacity(u32),
1476    SetACOnline(u32),
1477    SetFakeBatConfig(u32),
1478    CancelFakeConfig,
1479}
1480
1481impl BatControlCommand {
1482    pub fn new(property: String, target: String) -> std::result::Result<Self, BatControlResult> {
1483        let cmd = property.parse::<BatProperty>()?;
1484        match cmd {
1485            BatProperty::Status => Ok(BatControlCommand::SetStatus(target.parse::<BatStatus>()?)),
1486            BatProperty::Health => Ok(BatControlCommand::SetHealth(target.parse::<BatHealth>()?)),
1487            BatProperty::Present => Ok(BatControlCommand::SetPresent(
1488                target
1489                    .parse::<u32>()
1490                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1491            )),
1492            BatProperty::Capacity => Ok(BatControlCommand::SetCapacity(
1493                target
1494                    .parse::<u32>()
1495                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1496            )),
1497            BatProperty::ACOnline => Ok(BatControlCommand::SetACOnline(
1498                target
1499                    .parse::<u32>()
1500                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1501            )),
1502            BatProperty::SetFakeBatConfig => Ok(BatControlCommand::SetFakeBatConfig(
1503                target
1504                    .parse::<u32>()
1505                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1506            )),
1507            BatProperty::CancelFakeBatConfig => Ok(BatControlCommand::CancelFakeConfig),
1508        }
1509    }
1510}
1511
1512/// Used for VM to control battery properties.
1513pub struct BatControl {
1514    pub type_: BatteryType,
1515    pub control_tube: Tube,
1516}
1517
1518/// Used for VM to control for virtio-snd
1519#[derive(Serialize, Deserialize, Debug)]
1520pub enum SndControlCommand {
1521    MuteAll(bool),
1522}
1523
1524// Used to mark hotplug pci device's device type
1525#[derive(Serialize, Deserialize, Debug, Clone)]
1526pub enum HotPlugDeviceType {
1527    UpstreamPort,
1528    DownstreamPort,
1529    EndPoint,
1530}
1531
1532// Used for VM to hotplug pci devices
1533#[derive(Serialize, Deserialize, Debug, Clone)]
1534pub struct HotPlugDeviceInfo {
1535    pub device_type: HotPlugDeviceType,
1536    pub path: PathBuf,
1537    pub hp_interrupt: bool,
1538}
1539
1540/// Message for communicating a suspend or resume to the virtio-pvclock device.
1541#[derive(Serialize, Deserialize, Debug, Clone)]
1542pub enum PvClockCommand {
1543    Suspend,
1544    Resume,
1545}
1546
1547/// Message used by virtio-pvclock to communicate command results.
1548#[derive(Serialize, Deserialize, Debug)]
1549pub enum PvClockCommandResponse {
1550    Ok,
1551    Resumed { total_suspended_ticks: u64 },
1552    DeviceInactive,
1553    Err(SysError),
1554}
1555
1556/// Commands for vmm-swap feature
1557#[derive(Serialize, Deserialize, Debug)]
1558pub enum SwapCommand {
1559    Enable,
1560    Trim,
1561    SwapOut,
1562    Disable { slow_file_cleanup: bool },
1563    Status,
1564}
1565
1566///
1567/// A request to the main process to perform some operation on the VM.
1568///
1569/// Unless otherwise noted, each request should expect a `VmResponse::Ok` to be received on success.
1570#[derive(Serialize, Deserialize, Debug)]
1571pub enum VmRequest {
1572    /// Break the VM's run loop and exit.
1573    Exit,
1574    /// Trigger a power button event in the guest.
1575    Powerbtn,
1576    /// Trigger a sleep button event in the guest.
1577    Sleepbtn,
1578    /// Trigger a RTC interrupt in the guest. When the irq associated with the RTC is
1579    /// resampled, it will be re-asserted as long as `clear_evt` is not signaled.
1580    Rtc { clear_evt: Event },
1581    /// Suspend the VM's VCPUs until resume.
1582    SuspendVcpus,
1583    /// Swap the memory content into files on a disk
1584    Swap(SwapCommand),
1585    /// Resume the VM's VCPUs that were previously suspended.
1586    ResumeVcpus,
1587    /// Inject a general-purpose event. If `clear_evt` is provided, when the irq associated
1588    /// with the GPE is resampled, it will be re-asserted as long as `clear_evt` is not
1589    /// signaled.
1590    Gpe { gpe: u32, clear_evt: Option<Event> },
1591    /// Inject a PCI PME
1592    PciPme(u16),
1593    /// Make the VM's RT VCPU real-time.
1594    MakeRT,
1595    /// Command for balloon driver.
1596    #[cfg(feature = "balloon")]
1597    BalloonCommand(BalloonControlCommand),
1598    /// Send a command to a disk chosen by `disk_index`.
1599    /// `disk_index` is a 0-based count of `--disk`, `--rwdisk`, and `-r` command-line options.
1600    DiskCommand {
1601        disk_index: usize,
1602        command: DiskControlCommand,
1603    },
1604    /// Command to use controller.
1605    UsbCommand(UsbControlCommand),
1606    /// Command to modify the gpu.
1607    #[cfg(feature = "gpu")]
1608    GpuCommand(GpuControlCommand),
1609    /// Command to set battery.
1610    BatCommand(BatteryType, BatControlCommand),
1611    /// Command to control snd devices
1612    #[cfg(feature = "audio")]
1613    SndCommand(SndControlCommand),
1614    /// Command to add/remove multiple vfio-pci devices
1615    HotPlugVfioCommand {
1616        device: HotPlugDeviceInfo,
1617        add: bool,
1618    },
1619    /// Command to add/remove network tap device as virtio-pci device
1620    #[cfg(feature = "pci-hotplug")]
1621    HotPlugNetCommand(NetControlCommand),
1622    /// Command to Snapshot devices
1623    Snapshot(SnapshotCommand),
1624    /// Register for event notification
1625    RegisterListener {
1626        socket_addr: String,
1627        event: RegisteredEvent,
1628    },
1629    /// Unregister for notifications for event
1630    UnregisterListener {
1631        socket_addr: String,
1632        event: RegisteredEvent,
1633    },
1634    /// Unregister for all event notification
1635    Unregister { socket_addr: String },
1636    /// Suspend VM VCPUs and Devices until resume.
1637    SuspendVm,
1638    /// Resume VM VCPUs and Devices.
1639    ResumeVm,
1640    /// Returns Vcpus PID/TID
1641    VcpuPidTid,
1642    /// Throttles the requested vCPU for microseconds
1643    Throttle(usize, u32),
1644    /// Returns unique descriptor of this VM.
1645    GetVmDescriptor,
1646    /// Registers memory in guest.
1647    RegisterMemory {
1648        fd: SafeDescriptor,
1649        offset: u64,
1650        range_start: u64,
1651        range_end: u64,
1652        cache_coherent: bool,
1653    },
1654    /// Unregisters memory in guest.
1655    UnregisterMemory { region_id: u64 },
1656}
1657
1658/// NOTE: when making any changes to this enum please also update
1659/// RegisteredEventFfi in crosvm_control/src/lib.rs
1660#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
1661pub enum RegisteredEvent {
1662    VirtioBalloonWsReport,
1663    VirtioBalloonResize,
1664    VirtioBalloonOOMDeflation,
1665}
1666
1667#[derive(Serialize, Deserialize, Debug)]
1668pub enum RegisteredEventWithData {
1669    VirtioBalloonWsReport {
1670        ws_buckets: Vec<balloon_control::WSBucket>,
1671        balloon_actual: u64,
1672    },
1673    VirtioBalloonResize,
1674    VirtioBalloonOOMDeflation,
1675}
1676
1677impl RegisteredEventWithData {
1678    pub fn into_event(&self) -> RegisteredEvent {
1679        match self {
1680            Self::VirtioBalloonWsReport { .. } => RegisteredEvent::VirtioBalloonWsReport,
1681            Self::VirtioBalloonResize => RegisteredEvent::VirtioBalloonResize,
1682            Self::VirtioBalloonOOMDeflation => RegisteredEvent::VirtioBalloonOOMDeflation,
1683        }
1684    }
1685
1686    #[cfg(feature = "registered_events")]
1687    pub fn into_proto(&self) -> registered_events::RegisteredEvent {
1688        match self {
1689            Self::VirtioBalloonWsReport {
1690                ws_buckets,
1691                balloon_actual,
1692            } => {
1693                let mut report = registered_events::VirtioBalloonWsReport {
1694                    balloon_actual: *balloon_actual,
1695                    ..registered_events::VirtioBalloonWsReport::new()
1696                };
1697                for ws in ws_buckets {
1698                    report.ws_buckets.push(registered_events::VirtioWsBucket {
1699                        age: ws.age,
1700                        file_bytes: ws.bytes[0],
1701                        anon_bytes: ws.bytes[1],
1702                        ..registered_events::VirtioWsBucket::new()
1703                    });
1704                }
1705                let mut event = registered_events::RegisteredEvent::new();
1706                event.set_ws_report(report);
1707                event
1708            }
1709            Self::VirtioBalloonResize => {
1710                let mut event = registered_events::RegisteredEvent::new();
1711                event.set_resize(registered_events::VirtioBalloonResize::new());
1712                event
1713            }
1714            Self::VirtioBalloonOOMDeflation => {
1715                let mut event = registered_events::RegisteredEvent::new();
1716                event.set_oom_deflation(registered_events::VirtioBalloonOOMDeflation::new());
1717                event
1718            }
1719        }
1720    }
1721
1722    pub fn from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self {
1723        RegisteredEventWithData::VirtioBalloonWsReport {
1724            ws_buckets: ws.ws.clone(),
1725            balloon_actual,
1726        }
1727    }
1728}
1729
1730pub fn handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse {
1731    // Forward the request to the block device process via its control socket.
1732    if let Err(e) = disk_host_tube.send(command) {
1733        error!("disk socket send failed: {}", e);
1734        return VmResponse::Err(SysError::new(EINVAL));
1735    }
1736
1737    // Wait for the disk control command to be processed
1738    match disk_host_tube.recv() {
1739        Ok(DiskControlResult::Ok) => VmResponse::Ok,
1740        Ok(DiskControlResult::Err(e)) => VmResponse::Err(e),
1741        Err(e) => {
1742            error!("disk socket recv failed: {}", e);
1743            VmResponse::Err(SysError::new(EINVAL))
1744        }
1745    }
1746}
1747
1748/// WARNING: descriptor must be a mapping handle on Windows.
1749fn map_descriptor(
1750    descriptor: &dyn AsRawDescriptor,
1751    offset: u64,
1752    size: u64,
1753    prot: Protection,
1754) -> Result<Box<dyn MappedRegion>> {
1755    let size: usize = size.try_into().map_err(|_e| SysError::new(ERANGE))?;
1756    match MemoryMappingBuilder::new(size)
1757        .from_descriptor(descriptor)
1758        .offset(offset)
1759        .protection(prot)
1760        .build()
1761    {
1762        Ok(mmap) => Ok(Box::new(mmap)),
1763        Err(MmapError::SystemCallFailed(e)) => Err(e),
1764        _ => Err(SysError::new(EINVAL)),
1765    }
1766}
1767
1768// Get vCPU state. vCPUs are expected to all hold the same state.
1769// In this function, there may be a time where vCPUs are not holding the same state
1770// as they transition from one state to the other. This is expected, and the final result
1771// should be all vCPUs holding the same state.
1772fn get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode> {
1773    let (send_chan, recv_chan) = mpsc::channel();
1774    kick_vcpus(VcpuControl::GetStates(send_chan));
1775    if vcpu_num == 0 {
1776        bail!("vcpu_num is zero");
1777    }
1778    let mut current_mode_vec: Vec<VmRunMode> = Vec::new();
1779    for _ in 0..vcpu_num {
1780        match recv_chan.recv() {
1781            Ok(state) => current_mode_vec.push(state),
1782            Err(e) => {
1783                bail!("Failed to get vCPU state: {}", e);
1784            }
1785        };
1786    }
1787    let first_state = current_mode_vec[0];
1788    if first_state == VmRunMode::Exiting {
1789        panic!("Attempt to snapshot while exiting.");
1790    }
1791    if current_mode_vec.iter().any(|x| *x != first_state) {
1792        // We do not panic here. It could be that vCPUs are transitioning from one mode to another.
1793        bail!("Unknown VM state: vCPUs hold different states.");
1794    }
1795    Ok(first_state)
1796}
1797
1798/// A guard to guarantee that all the vCPUs are suspended during the scope.
1799///
1800/// When this guard is dropped, it rolls back the state of CPUs.
1801pub struct VcpuSuspendGuard<'a> {
1802    saved_run_mode: VmRunMode,
1803    kick_vcpus: &'a dyn Fn(VcpuControl),
1804}
1805
1806impl<'a> VcpuSuspendGuard<'a> {
1807    /// Check the all vCPU state and suspend the vCPUs if they are running.
1808    ///
1809    /// This returns [VcpuSuspendGuard] to rollback the vcpu state.
1810    ///
1811    /// # Arguments
1812    ///
1813    /// * `kick_vcpus` - A funtion to send [VcpuControl] message to all the vCPUs and interrupt
1814    ///   them.
1815    /// * `vcpu_num` - The number of vCPUs.
1816    pub fn new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self> {
1817        // get initial vcpu state
1818        let saved_run_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1819        match saved_run_mode {
1820            VmRunMode::Running => {
1821                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1822                // Blocking call, waiting for response to ensure vCPU state was updated.
1823                // In case of failure, where a vCPU still has the state running, start up vcpus and
1824                // abort operation.
1825                let current_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1826                if current_mode != VmRunMode::Suspending {
1827                    kick_vcpus(VcpuControl::RunState(saved_run_mode));
1828                    bail!("vCPUs failed to all suspend. Kicking back all vCPUs to their previous state: {saved_run_mode}");
1829                }
1830            }
1831            VmRunMode::Suspending => {
1832                // do nothing. keep the state suspending.
1833            }
1834            other => {
1835                bail!("vcpus are not in running/suspending state, but {}", other);
1836            }
1837        };
1838        Ok(Self {
1839            saved_run_mode,
1840            kick_vcpus,
1841        })
1842    }
1843}
1844
1845impl Drop for VcpuSuspendGuard<'_> {
1846    fn drop(&mut self) {
1847        if self.saved_run_mode != VmRunMode::Suspending {
1848            (self.kick_vcpus)(VcpuControl::RunState(self.saved_run_mode));
1849        }
1850    }
1851}
1852
1853/// A guard to guarantee that all devices are sleeping during its scope.
1854///
1855/// When this guard is dropped, it wakes the devices.
1856pub struct DeviceSleepGuard<'a> {
1857    device_control_tube: &'a Tube,
1858    devices_state: DevicesState,
1859}
1860
1861impl<'a> DeviceSleepGuard<'a> {
1862    fn new(device_control_tube: &'a Tube) -> anyhow::Result<Self> {
1863        device_control_tube
1864            .send(&DeviceControlCommand::GetDevicesState)
1865            .context("send command to devices control socket")?;
1866        let devices_state = match device_control_tube
1867            .recv()
1868            .context("receive from devices control socket")?
1869        {
1870            VmResponse::DevicesState(state) => state,
1871            resp => bail!("failed to get devices state. Unexpected behavior: {}", resp),
1872        };
1873        if let DevicesState::Wake = devices_state {
1874            device_control_tube
1875                .send(&DeviceControlCommand::SleepDevices)
1876                .context("send command to devices control socket")?;
1877            match device_control_tube
1878                .recv()
1879                .context("receive from devices control socket")?
1880            {
1881                VmResponse::Ok => (),
1882                resp => bail!("device sleep failed: {}", resp),
1883            }
1884        }
1885        Ok(Self {
1886            device_control_tube,
1887            devices_state,
1888        })
1889    }
1890}
1891
1892impl Drop for DeviceSleepGuard<'_> {
1893    fn drop(&mut self) {
1894        if let DevicesState::Wake = self.devices_state {
1895            if let Err(e) = self
1896                .device_control_tube
1897                .send(&DeviceControlCommand::WakeDevices)
1898            {
1899                panic!("failed to request device wake after snapshot: {e}");
1900            }
1901            match self.device_control_tube.recv() {
1902                Ok(VmResponse::Ok) => (),
1903                Ok(resp) => panic!("unexpected response to device wake request: {resp}"),
1904                Err(e) => panic!("failed to get reply for device wake request: {e}"),
1905            }
1906        }
1907    }
1908}
1909
1910impl VmRequest {
1911    /// Executes this request on the given Vm and other mutable state.
1912    ///
1913    /// This does not return a result, instead encapsulating the success or failure in a
1914    /// `VmResponse` with the intended purpose of sending the response back over the  socket that
1915    /// received this `VmRequest`.
1916    ///
1917    /// `suspended_pvclock_state`: If the hypervisor has its own pvclock (not the same as
1918    /// virtio-pvclock) and the VM is suspended (not just the vCPUs, but the full VM), then
1919    /// `suspended_pvclock_state` will be used to store the ClockState saved just after the vCPUs
1920    /// were suspended. It is important that we save the value right after the vCPUs are suspended
1921    /// and restore it right before the vCPUs are resumed (instead of, more naturally, during the
1922    /// snapshot/restore steps) because the pvclock continues to tick even when the vCPUs are
1923    /// suspended.
1924    #[allow(unused_variables)]
1925    pub fn execute(
1926        &self,
1927        vm: &impl Vm,
1928        disk_host_tubes: &[Tube],
1929        snd_host_tubes: &[Tube],
1930        pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
1931        gpu_control_tube: Option<&Tube>,
1932        usb_control_tube: Option<&Tube>,
1933        bat_control: &mut Option<BatControl>,
1934        kick_vcpus: impl Fn(VcpuControl),
1935        #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl),
1936        force_s2idle: bool,
1937        #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1938        device_control_tube: &Tube,
1939        vcpu_size: usize,
1940        irq_handler_control: &Tube,
1941        snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
1942        suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
1943    ) -> VmResponse {
1944        match self {
1945            VmRequest::Exit => {
1946                panic!("VmRequest::Exit should be handled by the platform run loop");
1947            }
1948            VmRequest::Powerbtn => {
1949                if let Some(pm) = pm {
1950                    pm.lock().pwrbtn_evt();
1951                    VmResponse::Ok
1952                } else {
1953                    error!("{:#?} not supported", *self);
1954                    VmResponse::Err(SysError::new(ENOTSUP))
1955                }
1956            }
1957            VmRequest::Sleepbtn => {
1958                if let Some(pm) = pm {
1959                    pm.lock().slpbtn_evt();
1960                    VmResponse::Ok
1961                } else {
1962                    error!("{:#?} not supported", *self);
1963                    VmResponse::Err(SysError::new(ENOTSUP))
1964                }
1965            }
1966            VmRequest::Rtc { clear_evt } => {
1967                if let Some(pm) = pm.as_ref() {
1968                    match clear_evt.try_clone() {
1969                        Ok(clear_evt) => {
1970                            // RTC event will asynchronously trigger wakeup.
1971                            pm.lock().rtc_evt(clear_evt);
1972                            VmResponse::Ok
1973                        }
1974                        Err(err) => {
1975                            error!("Error cloning clear_evt: {:?}", err);
1976                            VmResponse::Err(SysError::new(EIO))
1977                        }
1978                    }
1979                } else {
1980                    error!("{:#?} not supported", *self);
1981                    VmResponse::Err(SysError::new(ENOTSUP))
1982                }
1983            }
1984            VmRequest::SuspendVcpus => {
1985                if !force_s2idle {
1986                    kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1987                    let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
1988                        Ok(state) => state,
1989                        Err(e) => {
1990                            error!("failed to get vcpu state: {e}");
1991                            return VmResponse::Err(SysError::new(EIO));
1992                        }
1993                    };
1994                    if current_mode != VmRunMode::Suspending {
1995                        error!("vCPUs failed to all suspend.");
1996                        return VmResponse::Err(SysError::new(EIO));
1997                    }
1998                }
1999                VmResponse::Ok
2000            }
2001            VmRequest::ResumeVcpus => {
2002                if let Err(e) = device_control_tube.send(&DeviceControlCommand::GetDevicesState) {
2003                    error!("failed to send GetDevicesState: {}", e);
2004                    return VmResponse::Err(SysError::new(EIO));
2005                }
2006                let devices_state = match device_control_tube.recv() {
2007                    Ok(VmResponse::DevicesState(state)) => state,
2008                    Ok(resp) => {
2009                        error!("failed to get devices state. Unexpected behavior: {}", resp);
2010                        return VmResponse::Err(SysError::new(EINVAL));
2011                    }
2012                    Err(e) => {
2013                        error!("failed to get devices state. Unexpected behavior: {}", e);
2014                        return VmResponse::Err(SysError::new(EINVAL));
2015                    }
2016                };
2017                if let DevicesState::Sleep = devices_state {
2018                    error!("Trying to wake Vcpus while Devices are asleep. Did you mean to use `crosvm resume --full`?");
2019                    return VmResponse::Err(SysError::new(EINVAL));
2020                }
2021
2022                if force_s2idle {
2023                    // During resume also emulate powerbtn event which will allow to wakeup fully
2024                    // suspended guest.
2025                    if let Some(pm) = pm {
2026                        pm.lock().pwrbtn_evt();
2027                    } else {
2028                        error!("triggering power btn during resume not supported");
2029                        return VmResponse::Err(SysError::new(ENOTSUP));
2030                    }
2031                }
2032
2033                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2034                VmResponse::Ok
2035            }
2036            VmRequest::Swap(SwapCommand::Enable) => {
2037                #[cfg(feature = "swap")]
2038                if let Some(swap_controller) = swap_controller {
2039                    // Suspend all vcpus and devices while vmm-swap is enabling (move the guest
2040                    // memory contents to the staging memory) to guarantee no processes other than
2041                    // the swap monitor process access the guest memory.
2042                    let _vcpu_guard = match VcpuSuspendGuard::new(&kick_vcpus, vcpu_size) {
2043                        Ok(guard) => guard,
2044                        Err(e) => {
2045                            error!("failed to suspend vcpus: {:?}", e);
2046                            return VmResponse::Err(SysError::new(EINVAL));
2047                        }
2048                    };
2049                    // TODO(b/253386409): Use `devices::Suspendable::sleep()` instead of sending
2050                    // `SIGSTOP` signal.
2051                    let _devices_guard = match swap_controller.suspend_devices() {
2052                        Ok(guard) => guard,
2053                        Err(e) => {
2054                            error!("failed to suspend devices: {:?}", e);
2055                            return VmResponse::Err(SysError::new(EINVAL));
2056                        }
2057                    };
2058
2059                    return match swap_controller.enable() {
2060                        Ok(()) => VmResponse::Ok,
2061                        Err(e) => {
2062                            error!("swap enable failed: {}", e);
2063                            VmResponse::Err(SysError::new(EINVAL))
2064                        }
2065                    };
2066                }
2067                VmResponse::Err(SysError::new(ENOTSUP))
2068            }
2069            VmRequest::Swap(SwapCommand::Trim) => {
2070                #[cfg(feature = "swap")]
2071                if let Some(swap_controller) = swap_controller {
2072                    return match swap_controller.trim() {
2073                        Ok(()) => VmResponse::Ok,
2074                        Err(e) => {
2075                            error!("swap trim failed: {}", e);
2076                            VmResponse::Err(SysError::new(EINVAL))
2077                        }
2078                    };
2079                }
2080                VmResponse::Err(SysError::new(ENOTSUP))
2081            }
2082            VmRequest::Swap(SwapCommand::SwapOut) => {
2083                #[cfg(feature = "swap")]
2084                if let Some(swap_controller) = swap_controller {
2085                    return match swap_controller.swap_out() {
2086                        Ok(()) => VmResponse::Ok,
2087                        Err(e) => {
2088                            error!("swap out failed: {}", e);
2089                            VmResponse::Err(SysError::new(EINVAL))
2090                        }
2091                    };
2092                }
2093                VmResponse::Err(SysError::new(ENOTSUP))
2094            }
2095            VmRequest::Swap(SwapCommand::Disable {
2096                #[cfg(feature = "swap")]
2097                slow_file_cleanup,
2098                ..
2099            }) => {
2100                #[cfg(feature = "swap")]
2101                if let Some(swap_controller) = swap_controller {
2102                    return match swap_controller.disable(*slow_file_cleanup) {
2103                        Ok(()) => VmResponse::Ok,
2104                        Err(e) => {
2105                            error!("swap disable failed: {}", e);
2106                            VmResponse::Err(SysError::new(EINVAL))
2107                        }
2108                    };
2109                }
2110                VmResponse::Err(SysError::new(ENOTSUP))
2111            }
2112            VmRequest::Swap(SwapCommand::Status) => {
2113                #[cfg(feature = "swap")]
2114                if let Some(swap_controller) = swap_controller {
2115                    return match swap_controller.status() {
2116                        Ok(status) => VmResponse::SwapStatus(status),
2117                        Err(e) => {
2118                            error!("swap status failed: {}", e);
2119                            VmResponse::Err(SysError::new(EINVAL))
2120                        }
2121                    };
2122                }
2123                VmResponse::Err(SysError::new(ENOTSUP))
2124            }
2125            VmRequest::SuspendVm => {
2126                info!("Starting crosvm suspend");
2127                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
2128                let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
2129                    Ok(state) => state,
2130                    Err(e) => {
2131                        error!("failed to get vcpu state: {e}");
2132                        return VmResponse::Err(SysError::new(EIO));
2133                    }
2134                };
2135                if current_mode != VmRunMode::Suspending {
2136                    error!("vCPUs failed to all suspend.");
2137                    return VmResponse::Err(SysError::new(EIO));
2138                }
2139                // Snapshot the pvclock ASAP after stopping vCPUs.
2140                if vm.check_capability(VmCap::PvClock) {
2141                    if suspended_pvclock_state.is_none() {
2142                        *suspended_pvclock_state = Some(match vm.get_pvclock() {
2143                            Ok(x) => x,
2144                            Err(e) => {
2145                                error!("suspend_pvclock failed: {e:?}");
2146                                return VmResponse::Err(SysError::new(EIO));
2147                            }
2148                        });
2149                    }
2150                }
2151                if let Err(e) = device_control_tube
2152                    .send(&DeviceControlCommand::SleepDevices)
2153                    .context("send command to devices control socket")
2154                {
2155                    error!("{:?}", e);
2156                    return VmResponse::Err(SysError::new(EIO));
2157                };
2158                match device_control_tube
2159                    .recv()
2160                    .context("receive from devices control socket")
2161                {
2162                    Ok(VmResponse::Ok) => {
2163                        info!("Finished crosvm suspend successfully");
2164                        VmResponse::Ok
2165                    }
2166                    Ok(resp) => {
2167                        error!("device sleep failed: {}", resp);
2168                        VmResponse::Err(SysError::new(EIO))
2169                    }
2170                    Err(e) => {
2171                        error!("receive from devices control socket: {:?}", e);
2172                        VmResponse::Err(SysError::new(EIO))
2173                    }
2174                }
2175            }
2176            VmRequest::ResumeVm => {
2177                info!("Starting crosvm resume");
2178                if let Err(e) = device_control_tube
2179                    .send(&DeviceControlCommand::WakeDevices)
2180                    .context("send command to devices control socket")
2181                {
2182                    error!("{:?}", e);
2183                    return VmResponse::Err(SysError::new(EIO));
2184                };
2185                match device_control_tube
2186                    .recv()
2187                    .context("receive from devices control socket")
2188                {
2189                    Ok(VmResponse::Ok) => {
2190                        info!("Finished crosvm resume successfully");
2191                    }
2192                    Ok(resp) => {
2193                        error!("device wake failed: {}", resp);
2194                        return VmResponse::Err(SysError::new(EIO));
2195                    }
2196                    Err(e) => {
2197                        error!("receive from devices control socket: {:?}", e);
2198                        return VmResponse::Err(SysError::new(EIO));
2199                    }
2200                }
2201                // Resume the pvclock as late as possible before starting vCPUs.
2202                if vm.check_capability(VmCap::PvClock) {
2203                    // If None, then we aren't suspended, which is a valid case.
2204                    if let Some(x) = suspended_pvclock_state {
2205                        if let Err(e) = vm.set_pvclock(x) {
2206                            error!("resume_pvclock failed: {e:?}");
2207                            return VmResponse::Err(SysError::new(EIO));
2208                        }
2209                    }
2210                }
2211                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2212                VmResponse::Ok
2213            }
2214            VmRequest::Gpe { gpe, clear_evt } => {
2215                if let Some(pm) = pm.as_ref() {
2216                    match clear_evt.as_ref().map(|e| e.try_clone()).transpose() {
2217                        Ok(clear_evt) => {
2218                            pm.lock().gpe_evt(*gpe, clear_evt);
2219                            VmResponse::Ok
2220                        }
2221                        Err(err) => {
2222                            error!("Error cloning clear_evt: {:?}", err);
2223                            VmResponse::Err(SysError::new(EIO))
2224                        }
2225                    }
2226                } else {
2227                    error!("{:#?} not supported", *self);
2228                    VmResponse::Err(SysError::new(ENOTSUP))
2229                }
2230            }
2231            VmRequest::PciPme(requester_id) => {
2232                if let Some(pm) = pm.as_ref() {
2233                    pm.lock().pme_evt(*requester_id);
2234                    VmResponse::Ok
2235                } else {
2236                    error!("{:#?} not supported", *self);
2237                    VmResponse::Err(SysError::new(ENOTSUP))
2238                }
2239            }
2240            VmRequest::MakeRT => {
2241                kick_vcpus(VcpuControl::MakeRT);
2242                VmResponse::Ok
2243            }
2244            #[cfg(feature = "balloon")]
2245            VmRequest::BalloonCommand(_) => unreachable!("Should be handled with BalloonTube"),
2246            VmRequest::DiskCommand {
2247                disk_index,
2248                ref command,
2249            } => match &disk_host_tubes.get(*disk_index) {
2250                Some(tube) => handle_disk_command(command, tube),
2251                None => VmResponse::Err(SysError::new(ENODEV)),
2252            },
2253            #[cfg(feature = "gpu")]
2254            VmRequest::GpuCommand(ref cmd) => match gpu_control_tube {
2255                Some(gpu_control) => {
2256                    let res = gpu_control.send(cmd);
2257                    if let Err(e) = res {
2258                        error!("fail to send command to gpu control socket: {}", e);
2259                        return VmResponse::Err(SysError::new(EIO));
2260                    }
2261                    match gpu_control.recv() {
2262                        Ok(response) => VmResponse::GpuResponse(response),
2263                        Err(e) => {
2264                            error!("fail to recv command from gpu control socket: {}", e);
2265                            VmResponse::Err(SysError::new(EIO))
2266                        }
2267                    }
2268                }
2269                None => {
2270                    error!("gpu control is not enabled in crosvm");
2271                    VmResponse::Err(SysError::new(EIO))
2272                }
2273            },
2274            VmRequest::UsbCommand(ref cmd) => {
2275                let usb_control_tube = match usb_control_tube {
2276                    Some(t) => t,
2277                    None => {
2278                        error!("attempted to execute USB request without control tube");
2279                        return VmResponse::Err(SysError::new(ENODEV));
2280                    }
2281                };
2282                let res = usb_control_tube.send(cmd);
2283                if let Err(e) = res {
2284                    error!("fail to send command to usb control socket: {}", e);
2285                    return VmResponse::Err(SysError::new(EIO));
2286                }
2287                match usb_control_tube.recv() {
2288                    Ok(response) => VmResponse::UsbResponse(response),
2289                    Err(e) => {
2290                        error!("fail to recv command from usb control socket: {}", e);
2291                        VmResponse::Err(SysError::new(EIO))
2292                    }
2293                }
2294            }
2295            VmRequest::BatCommand(type_, ref cmd) => {
2296                match bat_control {
2297                    Some(battery) => {
2298                        if battery.type_ != *type_ {
2299                            error!("ignored battery command due to battery type: expected {:?}, got {:?}", battery.type_, type_);
2300                            return VmResponse::Err(SysError::new(EINVAL));
2301                        }
2302
2303                        let res = battery.control_tube.send(cmd);
2304                        if let Err(e) = res {
2305                            error!("fail to send command to bat control socket: {}", e);
2306                            return VmResponse::Err(SysError::new(EIO));
2307                        }
2308
2309                        match battery.control_tube.recv() {
2310                            Ok(response) => VmResponse::BatResponse(response),
2311                            Err(e) => {
2312                                error!("fail to recv command from bat control socket: {}", e);
2313                                VmResponse::Err(SysError::new(EIO))
2314                            }
2315                        }
2316                    }
2317                    None => VmResponse::BatResponse(BatControlResult::NoBatDevice),
2318                }
2319            }
2320            #[cfg(feature = "audio")]
2321            VmRequest::SndCommand(ref cmd) => match cmd {
2322                SndControlCommand::MuteAll(muted) => {
2323                    for tube in snd_host_tubes {
2324                        let res = tube.send(&SndControlCommand::MuteAll(*muted));
2325                        if let Err(e) = res {
2326                            error!("fail to send command to snd control socket: {}", e);
2327                            return VmResponse::Err(SysError::new(EIO));
2328                        }
2329
2330                        match tube.recv() {
2331                            Ok(VmResponse::Ok) => {
2332                                debug!("device is successfully muted");
2333                            }
2334                            Ok(resp) => {
2335                                error!("mute failed: {}", resp);
2336                                return VmResponse::ErrString("fail to mute the device".to_owned());
2337                            }
2338                            Err(e) => return VmResponse::Err(SysError::new(EIO)),
2339                        }
2340                    }
2341                    VmResponse::Ok
2342                }
2343            },
2344            VmRequest::HotPlugVfioCommand { device: _, add: _ } => VmResponse::Ok,
2345            #[cfg(feature = "pci-hotplug")]
2346            VmRequest::HotPlugNetCommand(ref _net_cmd) => {
2347                VmResponse::ErrString("hot plug not supported".to_owned())
2348            }
2349            VmRequest::Snapshot(SnapshotCommand::Take {
2350                ref snapshot_path,
2351                compress_memory,
2352                encrypt,
2353            }) => {
2354                info!("Starting crosvm snapshot");
2355                match do_snapshot(
2356                    snapshot_path.to_path_buf(),
2357                    kick_vcpus,
2358                    irq_handler_control,
2359                    device_control_tube,
2360                    vcpu_size,
2361                    snapshot_irqchip,
2362                    *compress_memory,
2363                    *encrypt,
2364                    suspended_pvclock_state,
2365                    vm,
2366                ) {
2367                    Ok(()) => {
2368                        info!("Finished crosvm snapshot successfully");
2369                        VmResponse::Ok
2370                    }
2371                    Err(e) => {
2372                        error!("failed to handle snapshot: {:?}", e);
2373                        VmResponse::Err(SysError::new(EIO))
2374                    }
2375                }
2376            }
2377            VmRequest::RegisterListener {
2378                socket_addr: _,
2379                event: _,
2380            } => VmResponse::Ok,
2381            VmRequest::UnregisterListener {
2382                socket_addr: _,
2383                event: _,
2384            } => VmResponse::Ok,
2385            VmRequest::Unregister { socket_addr: _ } => VmResponse::Ok,
2386            VmRequest::VcpuPidTid => unreachable!(),
2387            VmRequest::Throttle(_, _) => unreachable!(),
2388            VmRequest::GetVmDescriptor => {
2389                let vm_fd = match vm.try_clone_descriptor() {
2390                    Ok(vm_fd) => vm_fd,
2391                    Err(e) => {
2392                        error!("failed to get vm_fd: {:?}", e);
2393                        return VmResponse::Err(e);
2394                    }
2395                };
2396                VmResponse::VmDescriptor {
2397                    hypervisor: vm.hypervisor_kind(),
2398                    vm_fd,
2399                }
2400            }
2401            VmRequest::RegisterMemory { .. } => unreachable!(),
2402            VmRequest::UnregisterMemory { .. } => unreachable!(),
2403        }
2404    }
2405}
2406
2407/// Snapshot the VM to file at `snapshot_path`
2408fn do_snapshot(
2409    snapshot_path: PathBuf,
2410    kick_vcpus: impl Fn(VcpuControl),
2411    irq_handler_control: &Tube,
2412    device_control_tube: &Tube,
2413    vcpu_size: usize,
2414    snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
2415    compress_memory: bool,
2416    encrypt: bool,
2417    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2418    vm: &impl Vm,
2419) -> anyhow::Result<()> {
2420    let snapshot_start = Instant::now();
2421
2422    let _vcpu_guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size)?;
2423    let _device_guard = DeviceSleepGuard::new(device_control_tube)?;
2424
2425    // We want to flush all pending IRQs to the interrupt controller. There are two cases:
2426    //
2427    // MSIs: these are directly delivered to the interrupt controller.
2428    // We must verify the handler thread cycles once to deliver these interrupts.
2429    //
2430    // Legacy interrupts: in the case of a split IRQ chip, these interrupts may
2431    // flow through the userspace IOAPIC. If the hypervisor does not support
2432    // irqfds (e.g. WHPX), a single iteration will only flush the IRQ to the
2433    // IOAPIC. The underlying MSI will be asserted at this point, but if the
2434    // IRQ handler doesn't run another iteration, it won't be delivered to the
2435    // interrupt controller. This is why we cycle the handler thread twice (doing so
2436    // ensures we process the underlying MSI).
2437    //
2438    // We can handle both of these cases by iterating until there are no tokens
2439    // serviced on the requested iteration. Note that in the legacy case, this
2440    // ensures at least two iterations.
2441    //
2442    // Note: within CrosVM, *all* interrupts are eventually converted into the
2443    // same mechanicism that MSIs use. This is why we say "underlying" MSI for
2444    // a legacy IRQ.
2445    {
2446        let mut flush_attempts = 0;
2447        loop {
2448            irq_handler_control
2449                .send(&IrqHandlerRequest::WakeAndNotifyIteration)
2450                .context("failed to send flush command to IRQ handler thread")?;
2451            let resp = irq_handler_control
2452                .recv()
2453                .context("failed to recv flush response from IRQ handler thread")?;
2454            match resp {
2455                IrqHandlerResponse::HandlerIterationComplete(tokens_serviced) => {
2456                    if tokens_serviced == 0 {
2457                        break;
2458                    }
2459                }
2460                _ => bail!("received unexpected reply from IRQ handler: {:?}", resp),
2461            }
2462            flush_attempts += 1;
2463            if flush_attempts > EXPECTED_MAX_IRQ_FLUSH_ITERATIONS {
2464                warn!(
2465                    "flushing IRQs for snapshot may be stalled after iteration {}, expected <= {}
2466                      iterations",
2467                    flush_attempts, EXPECTED_MAX_IRQ_FLUSH_ITERATIONS
2468                );
2469            }
2470        }
2471        info!("flushed IRQs in {} iterations", flush_attempts);
2472    }
2473    let snapshot_writer = SnapshotWriter::new(snapshot_path, encrypt)?;
2474
2475    // Snapshot hypervisor's paravirtualized clock.
2476    snapshot_writer.write_fragment("pvclock", &AnySnapshot::to_any(suspended_pvclock_state)?)?;
2477
2478    // Snapshot Vcpus
2479    info!("VCPUs snapshotting...");
2480    let (send_chan, recv_chan) = mpsc::channel();
2481    kick_vcpus(VcpuControl::Snapshot(
2482        snapshot_writer.add_namespace("vcpu")?,
2483        send_chan,
2484    ));
2485    // Validate all Vcpus snapshot successfully
2486    for _ in 0..vcpu_size {
2487        recv_chan
2488            .recv()
2489            .context("Failed to recv Vcpu snapshot response")?
2490            .context("Failed to snapshot Vcpu")?;
2491    }
2492    info!("VCPUs snapshotted.");
2493
2494    // Snapshot irqchip
2495    info!("Snapshotting irqchip...");
2496    let irqchip_snap = snapshot_irqchip()?;
2497    snapshot_writer
2498        .write_fragment("irqchip", &irqchip_snap)
2499        .context("Failed to write irqchip state")?;
2500    info!("Snapshotted irqchip.");
2501
2502    // Snapshot memory
2503    {
2504        let mem_snap_start = Instant::now();
2505        // Use 64MB chunks when writing the memory snapshot (if encryption is used).
2506        const MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES: usize = 1024 * 1024 * 64;
2507        // SAFETY:
2508        // VM & devices are stopped.
2509        let guest_memory_metadata = unsafe {
2510            vm.get_memory()
2511                .snapshot(
2512                    &mut snapshot_writer.raw_fragment_with_chunk_size(
2513                        "mem",
2514                        MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES,
2515                    )?,
2516                    compress_memory,
2517                )
2518                .context("failed to snapshot memory")?
2519        };
2520        snapshot_writer.write_fragment("mem_metadata", &guest_memory_metadata)?;
2521
2522        let mem_snap_duration_ms = mem_snap_start.elapsed().as_millis();
2523        info!(
2524            "snapshot: memory snapshotted {}MB in {}ms",
2525            vm.get_memory().memory_size() / 1024 / 1024,
2526            mem_snap_duration_ms
2527        );
2528        metrics::log_metric_with_details(
2529            metrics::MetricEventType::SnapshotSaveMemoryLatency,
2530            mem_snap_duration_ms as i64,
2531            &metrics_events::RecordDetails {},
2532        );
2533    }
2534    // Snapshot devices
2535    info!("Devices snapshotting...");
2536    device_control_tube
2537        .send(&DeviceControlCommand::SnapshotDevices { snapshot_writer })
2538        .context("send command to devices control socket")?;
2539    let resp: VmResponse = device_control_tube
2540        .recv()
2541        .context("receive from devices control socket")?;
2542    if !matches!(resp, VmResponse::Ok) {
2543        bail!("unexpected SnapshotDevices response: {resp}");
2544    }
2545    info!("Devices snapshotted.");
2546
2547    let snap_duration_ms = snapshot_start.elapsed().as_millis();
2548    info!(
2549        "snapshot: completed snapshot in {}ms; VM mem size: {}MB",
2550        snap_duration_ms,
2551        vm.get_memory().memory_size() / 1024 / 1024,
2552    );
2553    metrics::log_metric_with_details(
2554        metrics::MetricEventType::SnapshotSaveOverallLatency,
2555        snap_duration_ms as i64,
2556        &metrics_events::RecordDetails {},
2557    );
2558    Ok(())
2559}
2560
2561/// Restore the VM to the snapshot at `restore_path`.
2562///
2563/// Same as `VmRequest::execute` with a `VmRequest::Restore`. Exposed as a separate function
2564/// because not all the `VmRequest::execute` arguments are available in the "cold restore" flow.
2565pub fn do_restore(
2566    restore_path: &Path,
2567    kick_vcpus: impl Fn(VcpuControl),
2568    kick_vcpu: impl Fn(VcpuControl, usize),
2569    irq_handler_control: &Tube,
2570    device_control_tube: &Tube,
2571    vcpu_size: usize,
2572    mut restore_irqchip: impl FnMut(AnySnapshot) -> anyhow::Result<()>,
2573    require_encrypted: bool,
2574    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2575    vm: &impl Vm,
2576) -> anyhow::Result<()> {
2577    let restore_start = Instant::now();
2578    let _guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size);
2579    let _devices_guard = DeviceSleepGuard::new(device_control_tube)?;
2580
2581    let snapshot_reader = SnapshotReader::new(restore_path, require_encrypted)?;
2582
2583    // Restore hypervisor's paravirtualized clock.
2584    *suspended_pvclock_state = snapshot_reader.read_fragment("pvclock")?;
2585
2586    // Restore IrqChip
2587    let irq_snapshot: AnySnapshot = snapshot_reader.read_fragment("irqchip")?;
2588    restore_irqchip(irq_snapshot)?;
2589
2590    // Restore Vcpu(s)
2591    let vcpu_snapshot_reader = snapshot_reader.namespace("vcpu")?;
2592    let vcpu_snapshot_count = vcpu_snapshot_reader.list_fragments()?.len();
2593    if vcpu_snapshot_count != vcpu_size {
2594        bail!(
2595            "bad cpu count in snapshot: expected={} got={}",
2596            vcpu_size,
2597            vcpu_snapshot_count,
2598        );
2599    }
2600    #[cfg(target_arch = "x86_64")]
2601    let host_tsc_reference_moment = {
2602        // SAFETY: rdtsc takes no arguments.
2603        unsafe { _rdtsc() }
2604    };
2605    let (send_chan, recv_chan) = mpsc::channel();
2606    for vcpu_id in 0..vcpu_size {
2607        kick_vcpu(
2608            VcpuControl::Restore(VcpuRestoreRequest {
2609                result_sender: send_chan.clone(),
2610                snapshot_reader: vcpu_snapshot_reader.clone(),
2611                #[cfg(target_arch = "x86_64")]
2612                host_tsc_reference_moment,
2613            }),
2614            vcpu_id,
2615        );
2616    }
2617    for _ in 0..vcpu_size {
2618        recv_chan
2619            .recv()
2620            .context("Failed to recv restore response")?
2621            .context("Failed to restore vcpu")?;
2622    }
2623
2624    // Restore Memory
2625    {
2626        let mem_restore_start = Instant::now();
2627        let guest_memory_metadata = snapshot_reader.read_fragment("mem_metadata")?;
2628        // SAFETY:
2629        // VM & devices are stopped.
2630        unsafe {
2631            vm.get_memory().restore(
2632                guest_memory_metadata,
2633                &mut snapshot_reader.raw_fragment("mem")?,
2634            )?
2635        };
2636        let mem_restore_duration_ms = mem_restore_start.elapsed().as_millis();
2637        info!(
2638            "snapshot: memory restored {}MB in {}ms",
2639            vm.get_memory().memory_size() / 1024 / 1024,
2640            mem_restore_duration_ms
2641        );
2642        metrics::log_metric_with_details(
2643            metrics::MetricEventType::SnapshotRestoreMemoryLatency,
2644            mem_restore_duration_ms as i64,
2645            &metrics_events::RecordDetails {},
2646        );
2647    }
2648    // Restore devices
2649    device_control_tube
2650        .send(&DeviceControlCommand::RestoreDevices {
2651            snapshot_reader: snapshot_reader.clone(),
2652        })
2653        .context("send restore devices command to devices control socket")?;
2654    let resp: VmResponse = device_control_tube
2655        .recv()
2656        .context("receive from devices control socket")?;
2657    if !matches!(resp, VmResponse::Ok) {
2658        bail!("unexpected RestoreDevices response: {resp}");
2659    }
2660
2661    // refresh the IRQ tokens.
2662    {
2663        irq_handler_control
2664            .send(&IrqHandlerRequest::RefreshIrqEventTokens)
2665            .context("failed to send refresh irq event token command to IRQ handler thread")?;
2666        let resp: IrqHandlerResponse = irq_handler_control
2667            .recv()
2668            .context("failed to recv refresh response from IRQ handler thread")?;
2669        if !matches!(resp, IrqHandlerResponse::IrqEventTokenRefreshComplete) {
2670            bail!(
2671                "received unexpected reply from IRQ handler thread: {:?}",
2672                resp
2673            );
2674        }
2675    }
2676
2677    let restore_duration_ms = restore_start.elapsed().as_millis();
2678    info!(
2679        "snapshot: completed restore in {}ms; mem size: {}",
2680        restore_duration_ms,
2681        vm.get_memory().memory_size(),
2682    );
2683
2684    metrics::log_metric_with_details(
2685        metrics::MetricEventType::SnapshotRestoreOverallLatency,
2686        restore_duration_ms as i64,
2687        &metrics_events::RecordDetails {},
2688    );
2689    Ok(())
2690}
2691
2692pub type HypervisorKind = hypervisor::HypervisorKind;
2693
2694/// Indication of success or failure of a `VmRequest`.
2695///
2696/// Success is usually indicated `VmResponse::Ok` unless there is data associated with the response.
2697#[derive(Serialize, Deserialize, Debug)]
2698#[must_use]
2699pub enum VmResponse {
2700    /// Indicates the request was executed successfully.
2701    Ok,
2702    /// Indicates the request encountered some error during execution.
2703    Err(SysError),
2704    /// Indicates the request encountered some error during execution.
2705    ErrString(String),
2706    /// The memory was registered into guest address space in memory slot number `slot`.
2707    RegisterMemory { slot: u32 },
2708    /// Variant of the register memory but with region_id.
2709    RegisterMemory2 { region_id: u64 },
2710    /// Results of balloon control commands.
2711    #[cfg(feature = "balloon")]
2712    BalloonStats {
2713        stats: balloon_control::BalloonStats,
2714        balloon_actual: u64,
2715    },
2716    /// Results of balloon WS-R command
2717    #[cfg(feature = "balloon")]
2718    BalloonWS {
2719        ws: balloon_control::BalloonWS,
2720        balloon_actual: u64,
2721    },
2722    /// Results of PCI hot plug
2723    #[cfg(feature = "pci-hotplug")]
2724    PciHotPlugResponse { bus: u8 },
2725    /// Results of usb control commands.
2726    UsbResponse(UsbControlResult),
2727    #[cfg(feature = "gpu")]
2728    /// Results of gpu control commands.
2729    GpuResponse(GpuControlResult),
2730    /// Results of battery control commands.
2731    BatResponse(BatControlResult),
2732    /// Results of swap status command.
2733    SwapStatus(SwapStatus),
2734    /// Gets the state of Devices (sleep/wake)
2735    DevicesState(DevicesState),
2736    /// Map of the Vcpu PID/TIDs
2737    VcpuPidTidResponse {
2738        pid_tid_map: BTreeMap<usize, (u32, u32)>,
2739    },
2740    VmDescriptor {
2741        hypervisor: HypervisorKind,
2742        vm_fd: SafeDescriptor,
2743    },
2744}
2745
2746impl Display for VmResponse {
2747    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2748        use self::VmResponse::*;
2749
2750        match self {
2751            Ok => write!(f, "ok"),
2752            Err(e) => write!(f, "error: {e}"),
2753            ErrString(e) => write!(f, "error: {e}"),
2754            RegisterMemory { slot } => write!(f, "memory registered in slot {slot}"),
2755            RegisterMemory2 { region_id } => {
2756                write!(f, "memory registered in region id {region_id}")
2757            }
2758            #[cfg(feature = "balloon")]
2759            VmResponse::BalloonStats {
2760                stats,
2761                balloon_actual,
2762            } => {
2763                write!(
2764                    f,
2765                    "stats: {}\nballoon_actual: {}",
2766                    serde_json::to_string_pretty(&stats)
2767                        .unwrap_or_else(|_| "invalid_response".to_string()),
2768                    balloon_actual
2769                )
2770            }
2771            #[cfg(feature = "balloon")]
2772            VmResponse::BalloonWS { ws, balloon_actual } => {
2773                write!(
2774                    f,
2775                    "ws: {}, balloon_actual: {}",
2776                    serde_json::to_string_pretty(&ws)
2777                        .unwrap_or_else(|_| "invalid_response".to_string()),
2778                    balloon_actual,
2779                )
2780            }
2781            UsbResponse(result) => write!(f, "usb control request get result {result:?}"),
2782            #[cfg(feature = "pci-hotplug")]
2783            PciHotPlugResponse { bus } => write!(f, "pci hotplug bus {bus:?}"),
2784            #[cfg(feature = "gpu")]
2785            GpuResponse(result) => write!(f, "gpu control request result {result:?}"),
2786            BatResponse(result) => write!(f, "{result}"),
2787            SwapStatus(status) => {
2788                write!(
2789                    f,
2790                    "{}",
2791                    serde_json::to_string(&status)
2792                        .unwrap_or_else(|_| "invalid_response".to_string()),
2793                )
2794            }
2795            DevicesState(status) => write!(f, "devices status: {status:?}"),
2796            VcpuPidTidResponse { pid_tid_map } => write!(f, "vcpu pid tid map: {pid_tid_map:?}"),
2797            VmDescriptor { hypervisor, vm_fd } => {
2798                write!(f, "hypervisor: {hypervisor:?}, vm_fd: {vm_fd:?}")
2799            }
2800        }
2801    }
2802}
2803
2804/// Enum that allows remote control of a wait context (used between the Windows GpuDisplay & the
2805/// GPU worker).
2806#[derive(Serialize, Deserialize)]
2807pub enum ModifyWaitContext {
2808    Add(#[serde(with = "with_as_descriptor")] Descriptor),
2809}
2810
2811#[sorted]
2812#[derive(Error, Debug)]
2813pub enum VirtioIOMMUVfioError {
2814    #[error("socket failed")]
2815    SocketFailed,
2816    #[error("unexpected response: {0}")]
2817    UnexpectedResponse(VirtioIOMMUResponse),
2818    #[error("unknown command: `{0}`")]
2819    UnknownCommand(String),
2820    #[error("{0}")]
2821    VfioControl(VirtioIOMMUVfioResult),
2822}
2823
2824#[derive(Serialize, Deserialize, Debug)]
2825pub enum VirtioIOMMUVfioCommand {
2826    // Add the vfio device attached to virtio-iommu.
2827    VfioDeviceAdd {
2828        endpoint_addr: u32,
2829        wrapper_id: u32,
2830        #[serde(with = "with_as_descriptor")]
2831        container: File,
2832    },
2833    // Delete the vfio device attached to virtio-iommu.
2834    VfioDeviceDel {
2835        endpoint_addr: u32,
2836    },
2837    // Map a dma-buf into vfio iommu table
2838    VfioDmabufMap {
2839        region_id: VmMemoryRegionId,
2840        gpa: u64,
2841        size: u64,
2842        dma_buf: SafeDescriptor,
2843    },
2844    // Unmap a dma-buf from vfio iommu table
2845    VfioDmabufUnmap(VmMemoryRegionId),
2846}
2847
2848#[derive(Serialize, Deserialize, Debug)]
2849pub enum VirtioIOMMUVfioResult {
2850    Ok,
2851    NotInPCIRanges,
2852    NoAvailableContainer,
2853    NoSuchDevice,
2854    NoSuchMappedDmabuf,
2855    InvalidParam,
2856}
2857
2858impl Display for VirtioIOMMUVfioResult {
2859    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2860        use self::VirtioIOMMUVfioResult::*;
2861
2862        match self {
2863            Ok => write!(f, "successfully"),
2864            NotInPCIRanges => write!(f, "not in the pci ranges of virtio-iommu"),
2865            NoAvailableContainer => write!(f, "no available vfio container"),
2866            NoSuchDevice => write!(f, "no such a vfio device"),
2867            NoSuchMappedDmabuf => write!(f, "no such a mapped dmabuf"),
2868            InvalidParam => write!(f, "invalid parameters"),
2869        }
2870    }
2871}
2872
2873/// A request to the virtio-iommu process to perform some operations.
2874///
2875/// Unless otherwise noted, each request should expect a `VirtioIOMMUResponse::Ok` to be received on
2876/// success.
2877#[derive(Serialize, Deserialize, Debug)]
2878pub enum VirtioIOMMURequest {
2879    /// Command for vfio related operations.
2880    VfioCommand(VirtioIOMMUVfioCommand),
2881}
2882
2883/// Indication of success or failure of a `VirtioIOMMURequest`.
2884///
2885/// Success is usually indicated `VirtioIOMMUResponse::Ok` unless there is data associated with the
2886/// response.
2887#[derive(Serialize, Deserialize, Debug)]
2888pub enum VirtioIOMMUResponse {
2889    /// Indicates the request was executed successfully.
2890    Ok,
2891    /// Indicates the request encountered some error during execution.
2892    Err(SysError),
2893    /// Results for Vfio commands.
2894    VfioResponse(VirtioIOMMUVfioResult),
2895}
2896
2897impl Display for VirtioIOMMUResponse {
2898    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2899        use self::VirtioIOMMUResponse::*;
2900        match self {
2901            Ok => write!(f, "ok"),
2902            Err(e) => write!(f, "error: {e}"),
2903            VfioResponse(result) => write!(
2904                f,
2905                "The vfio-related virtio-iommu request got result: {result:?}"
2906            ),
2907        }
2908    }
2909}
2910
2911/// Send VirtioIOMMURequest without waiting for the response
2912pub fn virtio_iommu_request_async(
2913    iommu_control_tube: &Tube,
2914    req: &VirtioIOMMURequest,
2915) -> VirtioIOMMUResponse {
2916    match iommu_control_tube.send(&req) {
2917        Ok(_) => VirtioIOMMUResponse::Ok,
2918        Err(e) => {
2919            error!("virtio-iommu socket send failed: {:?}", e);
2920            VirtioIOMMUResponse::Err(SysError::last())
2921        }
2922    }
2923}
2924
2925pub type VirtioIOMMURequestResult = std::result::Result<VirtioIOMMUResponse, ()>;
2926
2927/// Send VirtioIOMMURequest and wait to get the response
2928pub fn virtio_iommu_request(
2929    iommu_control_tube: &Tube,
2930    req: &VirtioIOMMURequest,
2931) -> VirtioIOMMURequestResult {
2932    let response = match virtio_iommu_request_async(iommu_control_tube, req) {
2933        VirtioIOMMUResponse::Ok => match iommu_control_tube.recv() {
2934            Ok(response) => response,
2935            Err(e) => {
2936                error!("virtio-iommu socket recv failed: {:?}", e);
2937                VirtioIOMMUResponse::Err(SysError::last())
2938            }
2939        },
2940        resp => resp,
2941    };
2942    Ok(response)
2943}
2944
2945#[cfg(test)]
2946mod tests {
2947    use anyhow::anyhow;
2948
2949    use super::*;
2950
2951    #[test]
2952    fn vm_memory_response_error_should_serialize_and_deserialize_correctly() {
2953        let source_error: VmMemoryResponseError = anyhow!("root cause")
2954            .context("context 1")
2955            .context("context 2")
2956            .into();
2957        let serialized_bytes =
2958            serde_json::to_vec(&source_error).expect("should serialize to json successfully");
2959        let target_error = serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2960            .expect("should deserialize from json successfully");
2961        assert_eq!(source_error.0.to_string(), target_error.0.to_string());
2962        assert_eq!(
2963            source_error
2964                .0
2965                .chain()
2966                .map(ToString::to_string)
2967                .collect::<Vec<_>>(),
2968            target_error
2969                .0
2970                .chain()
2971                .map(ToString::to_string)
2972                .collect::<Vec<_>>()
2973        );
2974    }
2975
2976    #[test]
2977    fn vm_memory_response_error_deserialization_should_handle_malformat_correctly() {
2978        let flat_source = FlatVmMemoryResponseError(vec![]);
2979        let serialized_bytes =
2980            serde_json::to_vec(&flat_source).expect("should serialize to json successfully");
2981        serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2982            .expect_err("deserialize with 0 error messages should fail");
2983    }
2984}