vm_control/
lib.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Handles IPC for controlling the main VM process.
6//!
7//! The VM Control IPC protocol is synchronous, meaning that each `VmRequest` sent over a connection
8//! will receive a `VmResponse` for that request next time data is received over that connection.
9//!
10//! The wire message format is a little-endian C-struct of fixed size, along with a file descriptor
11//! if the request type expects one.
12
13pub mod api;
14
15mod any_control_tube;
16pub use any_control_tube::AnyControlTube;
17
18mod device_id;
19pub use device_id::DeviceId;
20pub use device_id::PciId;
21pub use device_id::PlatformDeviceId;
22
23#[cfg(feature = "gdb")]
24pub mod gdb;
25pub mod gpu;
26
27use base::debug;
28#[cfg(any(target_os = "android", target_os = "linux"))]
29use base::linux::MemoryMappingBuilderUnix;
30#[cfg(any(target_os = "android", target_os = "linux"))]
31use base::sys::call_with_extended_max_files;
32#[cfg(any(target_os = "android", target_os = "linux"))]
33use base::MemoryMappingArena;
34#[cfg(windows)]
35use base::MemoryMappingBuilderWindows;
36use hypervisor::BalloonEvent;
37use hypervisor::MemCacheType;
38use hypervisor::MemRegion;
39use snapshot::AnySnapshot;
40
41#[cfg(feature = "balloon")]
42mod balloon_tube;
43pub mod client;
44pub mod sys;
45
46#[cfg(target_arch = "x86_64")]
47use std::arch::x86_64::_rdtsc;
48use std::collections::BTreeMap;
49use std::collections::BTreeSet;
50use std::collections::HashMap;
51use std::convert::TryInto;
52use std::fmt;
53use std::fmt::Display;
54use std::fs::File;
55use std::path::Path;
56use std::path::PathBuf;
57use std::result::Result as StdResult;
58use std::str::FromStr;
59use std::sync::mpsc;
60use std::sync::Arc;
61use std::time::Instant;
62
63use anyhow::bail;
64use anyhow::Context;
65use base::error;
66use base::info;
67use base::warn;
68use base::with_as_descriptor;
69use base::AsRawDescriptor;
70use base::Descriptor;
71use base::Error as SysError;
72use base::Event;
73use base::ExternalMapping;
74use base::IntoRawDescriptor;
75use base::MappedRegion;
76use base::MemoryMappingBuilder;
77use base::MmapError;
78use base::Protection;
79use base::Result;
80use base::SafeDescriptor;
81use base::SharedMemory;
82use base::Tube;
83use hypervisor::Datamatch;
84use hypervisor::IoEventAddress;
85use hypervisor::IrqRoute;
86use hypervisor::IrqSource;
87pub use hypervisor::MemSlot;
88use hypervisor::Vm;
89use hypervisor::VmCap;
90use libc::EINVAL;
91use libc::EIO;
92use libc::ENODEV;
93use libc::ENOTSUP;
94use libc::ERANGE;
95#[cfg(feature = "registered_events")]
96use protos::registered_events;
97use remain::sorted;
98use resources::Alloc;
99use resources::SystemAllocator;
100use rutabaga_gfx::RutabagaDescriptor;
101use rutabaga_gfx::RutabagaFromRawDescriptor;
102use rutabaga_gfx::RutabagaGralloc;
103use rutabaga_gfx::RutabagaMappedRegion;
104use rutabaga_gfx::RutabagaMesaHandle;
105use rutabaga_gfx::VulkanInfo;
106use serde::de::Error;
107use serde::Deserialize;
108use serde::Serialize;
109use snapshot::SnapshotReader;
110use snapshot::SnapshotWriter;
111use swap::SwapStatus;
112use sync::Mutex;
113#[cfg(any(target_os = "android", target_os = "linux"))]
114pub use sys::FsMappingRequest;
115#[cfg(windows)]
116pub use sys::InitialAudioSessionState;
117#[cfg(any(target_os = "android", target_os = "linux"))]
118pub use sys::VmMemoryMappingRequest;
119#[cfg(any(target_os = "android", target_os = "linux"))]
120pub use sys::VmMemoryMappingResponse;
121use thiserror::Error;
122pub use vm_control_product::GpuSendToMain;
123pub use vm_control_product::GpuSendToService;
124pub use vm_control_product::ServiceSendToGpu;
125use vm_memory::GuestAddress;
126
127#[cfg(feature = "balloon")]
128pub use crate::balloon_tube::BalloonControlCommand;
129#[cfg(feature = "balloon")]
130pub use crate::balloon_tube::BalloonTube;
131#[cfg(feature = "gdb")]
132pub use crate::gdb::VcpuDebug;
133#[cfg(feature = "gdb")]
134pub use crate::gdb::VcpuDebugStatus;
135#[cfg(feature = "gdb")]
136pub use crate::gdb::VcpuDebugStatusMessage;
137use crate::gpu::GpuControlCommand;
138use crate::gpu::GpuControlResult;
139
140/// Control the state of a particular VM CPU.
141#[derive(Clone, Debug)]
142pub enum VcpuControl {
143    #[cfg(feature = "gdb")]
144    Debug(VcpuDebug),
145    RunState(VmRunMode),
146    MakeRT,
147    // Request the current state of the vCPU. The result is sent back over the included channel.
148    GetStates(mpsc::Sender<VmRunMode>),
149    // Request the vcpu write a snapshot of itself to the writer, then send a `Result` back over
150    // the channel after completion/failure.
151    Snapshot(SnapshotWriter, mpsc::Sender<anyhow::Result<()>>),
152    Restore(VcpuRestoreRequest),
153    #[cfg(any(target_os = "android", target_os = "linux"))]
154    Throttle(u32),
155}
156
157/// Request to restore a Vcpu from a given snapshot, and report the results
158/// back via the provided channel.
159#[derive(Clone, Debug)]
160pub struct VcpuRestoreRequest {
161    pub result_sender: mpsc::Sender<anyhow::Result<()>>,
162    pub snapshot_reader: SnapshotReader,
163    #[cfg(target_arch = "x86_64")]
164    pub host_tsc_reference_moment: u64,
165}
166
167/// Mode of execution for the VM.
168#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
169pub enum VmRunMode {
170    /// The default run mode indicating the VCPUs are running.
171    #[default]
172    Running,
173    /// Indicates that the VCPUs are suspending execution until the `Running` mode is set.
174    Suspending,
175    /// Indicates that the VM is exiting all processes.
176    Exiting,
177    /// Indicates that the VM is in a breakpoint waiting for the debugger to do continue.
178    Breakpoint,
179}
180
181impl Display for VmRunMode {
182    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
183        use self::VmRunMode::*;
184
185        match self {
186            Running => write!(f, "running"),
187            Suspending => write!(f, "suspending"),
188            Exiting => write!(f, "exiting"),
189            Breakpoint => write!(f, "breakpoint"),
190        }
191    }
192}
193
194// Trait for devices that get notification on specific PCI PME
195pub trait PmeNotify: Send {
196    fn notify(&mut self, _requester_id: u16) {}
197}
198
199pub trait PmResource {
200    fn pwrbtn_evt(&mut self) {}
201    fn slpbtn_evt(&mut self) {}
202    fn rtc_evt(&mut self, _clear_evt: Event) {}
203    fn gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>) {}
204    fn pme_evt(&mut self, _requester_id: u16) {}
205    fn register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>) {}
206}
207
208/// The maximum number of devices that can be listed in one `UsbControlCommand`.
209///
210/// This value was set to be equal to `xhci_regs::MAX_PORTS` for convenience, but it is not
211/// necessary for correctness. Importing that value directly would be overkill because it would
212/// require adding a big dependency for a single const.
213pub const USB_CONTROL_MAX_PORTS: usize = 16;
214
215#[derive(Serialize, Deserialize, Debug)]
216pub enum DiskControlCommand {
217    /// Resize a disk to `new_size` in bytes.
218    Resize { new_size: u64 },
219}
220
221impl Display for DiskControlCommand {
222    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
223        use self::DiskControlCommand::*;
224
225        match self {
226            Resize { new_size } => write!(f, "disk_resize {new_size}"),
227        }
228    }
229}
230
231#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
232pub enum DiskControlResult {
233    Ok,
234    Err(SysError),
235}
236
237#[derive(Serialize, Deserialize, Debug, Clone)]
238pub enum FsAllowlistCommand {
239    AddPaths { paths: Vec<PathBuf> },
240    RemovePaths { paths: Vec<PathBuf> },
241}
242
243#[derive(Serialize, Deserialize, Debug, Clone)]
244pub enum FsAllowlistResponse {
245    Ok,
246    Err(String),
247}
248
249/// Net control commands for adding and removing tap devices.
250#[cfg(feature = "pci-hotplug")]
251#[derive(Serialize, Deserialize, Debug)]
252pub enum NetControlCommand {
253    AddTap(String),
254    RemoveTap(u8),
255}
256
257#[derive(Serialize, Deserialize, Debug)]
258pub enum UsbControlCommand {
259    AttachDevice {
260        #[serde(with = "with_as_descriptor")]
261        file: File,
262    },
263    AttachSecurityKey {
264        #[serde(with = "with_as_descriptor")]
265        file: File,
266    },
267    DetachDevice {
268        port: u8,
269    },
270    ListDevice {
271        ports: [u8; USB_CONTROL_MAX_PORTS],
272    },
273}
274
275#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default)]
276pub struct UsbControlAttachedDevice {
277    pub port: u8,
278    pub vendor_id: u16,
279    pub product_id: u16,
280}
281
282impl UsbControlAttachedDevice {
283    pub fn valid(self) -> bool {
284        self.port != 0
285    }
286}
287
288#[cfg(feature = "pci-hotplug")]
289#[derive(Serialize, Deserialize, Debug, Clone)]
290#[must_use]
291/// Result for hotplug and removal of PCI device.
292pub enum PciControlResult {
293    AddOk { bus: u8 },
294    ErrString(String),
295    RemoveOk,
296}
297
298#[cfg(feature = "pci-hotplug")]
299impl Display for PciControlResult {
300    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
301        use self::PciControlResult::*;
302
303        match self {
304            AddOk { bus } => write!(f, "add_ok {bus}"),
305            ErrString(e) => write!(f, "error: {e}"),
306            RemoveOk => write!(f, "remove_ok"),
307        }
308    }
309}
310
311#[derive(Serialize, Deserialize, Debug, Clone)]
312pub enum UsbControlResult {
313    Ok { port: u8 },
314    NoAvailablePort,
315    NoSuchDevice,
316    NoSuchPort,
317    FailedToOpenDevice,
318    Devices([UsbControlAttachedDevice; USB_CONTROL_MAX_PORTS]),
319    FailedToInitHostDevice,
320}
321
322impl Display for UsbControlResult {
323    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
324        use self::UsbControlResult::*;
325
326        match self {
327            UsbControlResult::Ok { port } => write!(f, "ok {port}"),
328            NoAvailablePort => write!(f, "no_available_port"),
329            NoSuchDevice => write!(f, "no_such_device"),
330            NoSuchPort => write!(f, "no_such_port"),
331            FailedToOpenDevice => write!(f, "failed_to_open_device"),
332            Devices(devices) => {
333                write!(f, "devices")?;
334                for d in devices.iter().filter(|d| d.valid()) {
335                    write!(f, " {} {:04x} {:04x}", d.port, d.vendor_id, d.product_id)?;
336                }
337                std::result::Result::Ok(())
338            }
339            FailedToInitHostDevice => write!(f, "failed_to_init_host_device"),
340        }
341    }
342}
343
344/// Commands for snapshot feature
345#[derive(Serialize, Deserialize, Debug)]
346pub enum SnapshotCommand {
347    Take {
348        snapshot_path: PathBuf,
349        compress_memory: bool,
350        encrypt: bool,
351    },
352}
353
354/// Commands for actions on devices and the devices control thread.
355#[derive(Serialize, Deserialize, Debug)]
356pub enum DeviceControlCommand {
357    SleepDevices,
358    WakeDevices,
359    SnapshotDevices { snapshot_writer: SnapshotWriter },
360    RestoreDevices { snapshot_reader: SnapshotReader },
361    GetDevicesState,
362    Exit,
363}
364
365/// Commands to control the IRQ handler thread.
366#[derive(Serialize, Deserialize)]
367pub enum IrqHandlerRequest {
368    /// No response is sent for this command.
369    AddIrqControlTubes(Vec<Tube>),
370    /// Refreshes the set of event tokens (Events) from the Irqchip that the IRQ
371    /// handler waits on to forward IRQs to their final destination (e.g. via
372    /// Irqchip::service_irq_event).
373    ///
374    /// If the set of tokens exposed by the Irqchip changes while the VM is
375    /// running (such as for snapshot restore), this command must be sent
376    /// otherwise the VM will not receive IRQs as expected.
377    RefreshIrqEventTokens,
378    WakeAndNotifyIteration,
379    /// No response is sent for this command.
380    Exit,
381}
382
383const EXPECTED_MAX_IRQ_FLUSH_ITERATIONS: usize = 100;
384
385/// Response for [IrqHandlerRequest].
386#[derive(Serialize, Deserialize, Debug)]
387pub enum IrqHandlerResponse {
388    /// Sent when the IRQ event tokens have been refreshed.
389    IrqEventTokenRefreshComplete,
390    /// Specifies the number of tokens serviced in the requested iteration
391    /// (less the token for the `WakeAndNotifyIteration` request).
392    HandlerIterationComplete(usize),
393}
394
395/// Source of a `VmMemoryRequest::RegisterMemory` mapping.
396#[derive(Serialize, Deserialize)]
397pub enum VmMemorySource {
398    /// Register shared memory represented by the given descriptor.
399    /// On Windows, descriptor MUST be a mapping handle.
400    SharedMemory(SharedMemory),
401    /// Register a file mapping from the given descriptor.
402    Descriptor {
403        /// File descriptor to map.
404        descriptor: SafeDescriptor,
405        /// Offset within the file in bytes.
406        offset: u64,
407        /// Size of the mapping in bytes.
408        size: u64,
409    },
410    /// Register memory mapped by Vulkano.
411    Vulkan {
412        descriptor: SafeDescriptor,
413        handle_type: u32,
414        memory_idx: u32,
415        device_uuid: [u8; 16],
416        driver_uuid: [u8; 16],
417        size: u64,
418    },
419    /// Register the current rutabaga external mapping.
420    ExternalMapping { ptr: u64, size: u64 },
421}
422
423// The following are wrappers to avoid base dependencies in the rutabaga crate
424fn to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor {
425    // SAFETY:
426    // Safe because we own the SafeDescriptor at this point.
427    unsafe { RutabagaDescriptor::from_raw_descriptor(s.into_raw_descriptor()) }
428}
429
430struct RutabagaMemoryRegion {
431    region: Box<dyn RutabagaMappedRegion>,
432}
433
434impl RutabagaMemoryRegion {
435    pub fn new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion {
436        RutabagaMemoryRegion { region }
437    }
438}
439
440// SAFETY:
441//
442// Self guarantees `ptr`..`ptr+size` is an mmaped region owned by this object that
443// can't be unmapped during the `MappedRegion`'s lifetime.
444unsafe impl MappedRegion for RutabagaMemoryRegion {
445    fn as_ptr(&self) -> *mut u8 {
446        self.region.as_ptr()
447    }
448
449    fn size(&self) -> usize {
450        self.region.size()
451    }
452}
453
454impl Display for VmMemorySource {
455    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
456        use self::VmMemorySource::*;
457
458        match self {
459            SharedMemory(..) => write!(f, "VmMemorySource::SharedMemory"),
460            Descriptor { .. } => write!(f, "VmMemorySource::Descriptor"),
461            Vulkan { .. } => write!(f, "VmMemorySource::Vulkan"),
462            ExternalMapping { .. } => write!(f, "VmMemorySource::ExternalMapping"),
463        }
464    }
465}
466
467impl VmMemorySource {
468    /// Map the resource and return its mapping and size in bytes.
469    fn map(
470        self,
471        gralloc: &mut RutabagaGralloc,
472        prot: Protection,
473    ) -> anyhow::Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)> {
474        let (mem_region, size, descriptor) = match self {
475            VmMemorySource::Descriptor {
476                descriptor,
477                offset,
478                size,
479            } => (
480                map_descriptor(&descriptor, offset, size, prot)?,
481                size,
482                Some(descriptor),
483            ),
484
485            VmMemorySource::SharedMemory(shm) => {
486                (map_descriptor(&shm, 0, shm.size(), prot)?, shm.size(), None)
487            }
488            VmMemorySource::Vulkan {
489                descriptor,
490                handle_type,
491                memory_idx,
492                device_uuid,
493                driver_uuid,
494                size,
495            } => {
496                let device_id = rutabaga_gfx::DeviceId {
497                    device_uuid,
498                    driver_uuid,
499                };
500                let mapped_region = gralloc
501                    .import_and_map(
502                        RutabagaMesaHandle {
503                            os_handle: to_rutabaga_desciptor(descriptor),
504                            handle_type,
505                        },
506                        VulkanInfo {
507                            memory_idx,
508                            device_id,
509                        },
510                        size,
511                    )
512                    .with_context(|| {
513                        format!(
514                            "gralloc failed to import and map, handle type: {handle_type}, memory index {memory_idx}, \
515                             size: {size}"
516                        )
517                    })?;
518                let mapped_region: Box<dyn MappedRegion> =
519                    Box::new(RutabagaMemoryRegion::new(mapped_region));
520                (mapped_region, size, None)
521            }
522            VmMemorySource::ExternalMapping { ptr, size } => {
523                let mapped_region: Box<dyn MappedRegion> = Box::new(ExternalMapping {
524                    ptr,
525                    size: size as usize,
526                });
527                (mapped_region, size, None)
528            }
529        };
530        Ok((mem_region, size, descriptor))
531    }
532}
533
534/// Destination of a `VmMemoryRequest::RegisterMemory` mapping in guest address space.
535#[derive(Serialize, Deserialize)]
536pub enum VmMemoryDestination {
537    /// Map at an offset within an existing PCI BAR allocation.
538    ExistingAllocation { allocation: Alloc, offset: u64 },
539    /// Map at the specified guest physical address.
540    GuestPhysicalAddress(u64),
541}
542
543impl VmMemoryDestination {
544    /// Allocate and return the guest address of a memory mapping destination.
545    pub fn allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress> {
546        let addr = match self {
547            VmMemoryDestination::ExistingAllocation { allocation, offset } => allocator
548                .mmio_allocator_any()
549                .address_from_pci_offset(allocation, offset, size)
550                .map_err(|_e| SysError::new(EINVAL))?,
551            VmMemoryDestination::GuestPhysicalAddress(gpa) => gpa,
552        };
553        Ok(GuestAddress(addr))
554    }
555}
556
557/// Request to register or unregister an ioevent.
558#[derive(Serialize, Deserialize)]
559pub struct IoEventUpdateRequest {
560    pub event: Event,
561    pub addr: u64,
562    pub datamatch: Datamatch,
563    pub register: bool,
564}
565
566/// Request to mmap a file to a shared memory.
567/// This request is supposed to follow a `VmMemoryRequest::MmapAndRegisterMemory` request that
568/// contains `SharedMemory` that `file` is mmaped to.
569#[cfg(any(target_os = "android", target_os = "linux"))]
570#[derive(Serialize, Deserialize)]
571pub struct VmMemoryFileMapping {
572    #[serde(with = "with_as_descriptor")]
573    pub file: File,
574    pub length: usize,
575    pub mem_offset: usize,
576    pub file_offset: u64,
577}
578
579#[derive(Serialize, Deserialize)]
580pub enum VmMemoryRequest {
581    /// Prepare a shared memory region to make later operations more efficient. This
582    /// may be a no-op depending on underlying platform support.
583    PrepareSharedMemoryRegion { alloc: Alloc, cache: MemCacheType },
584    /// Register a memory to be mapped to the guest.
585    RegisterMemory {
586        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
587        source: VmMemorySource,
588        /// Where to map the memory in the guest.
589        dest: VmMemoryDestination,
590        /// Whether to map the memory read only (true) or read-write (false).
591        prot: Protection,
592        /// Cache attribute for guest memory setting
593        cache: MemCacheType,
594    },
595    #[cfg(any(target_os = "android", target_os = "linux"))]
596    /// Call mmap to `shm` and register the memory region as a read-only guest memory.
597    /// This request is followed by an array of `VmMemoryFileMapping` with length
598    /// `num_file_mappings`
599    MmapAndRegisterMemory {
600        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
601        shm: SharedMemory,
602        /// Where to map the memory in the guest.
603        dest: VmMemoryDestination,
604        /// Length of the array of `VmMemoryFileMapping` that follows.
605        num_file_mappings: usize,
606    },
607    /// Call hypervisor to free the given memory range.
608    DynamicallyFreeMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
609    /// Call hypervisor to reclaim a priorly freed memory range.
610    DynamicallyReclaimMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
611    /// Balloon allocation/deallocation target reached.
612    BalloonTargetReached { size: u64 },
613    /// Unregister the given memory slot that was previously registered with `RegisterMemory`.
614    UnregisterMemory(VmMemoryRegionId),
615    /// Register an eventfd with raw guest memory address.
616    IoEventRaw(IoEventUpdateRequest),
617}
618
619/// Struct for managing `VmMemoryRequest`s IOMMU related state.
620pub struct VmMemoryRequestIommuClient {
621    tube: Arc<Mutex<Tube>>,
622    registered_memory: BTreeSet<VmMemoryRegionId>,
623}
624
625impl VmMemoryRequestIommuClient {
626    /// Constructs `VmMemoryRequestIommuClient` from a tube for communication with the viommu.
627    pub fn new(tube: Arc<Mutex<Tube>>) -> Self {
628        Self {
629            tube,
630            registered_memory: BTreeSet::new(),
631        }
632    }
633}
634
635enum RegisteredMemory {
636    FixedMapping {
637        slot: MemSlot,
638        offset: usize,
639        size: usize,
640    },
641    DynamicMapping {
642        slot: MemSlot,
643    },
644}
645
646pub struct VmMappedMemoryRegion {
647    guest_address: GuestAddress,
648    slot: MemSlot,
649}
650
651#[derive(Default)]
652pub struct VmMemoryRegionState {
653    mapped_regions: HashMap<Alloc, VmMappedMemoryRegion>,
654    registered_memory: BTreeMap<VmMemoryRegionId, RegisteredMemory>,
655}
656
657fn try_map_to_prepared_region(
658    vm: &dyn Vm,
659    region_state: &mut VmMemoryRegionState,
660    source: &VmMemorySource,
661    dest: &VmMemoryDestination,
662    prot: &Protection,
663) -> Option<VmMemoryResponse> {
664    let VmMemoryDestination::ExistingAllocation {
665        allocation,
666        offset: dest_offset,
667    } = dest
668    else {
669        return None;
670    };
671
672    let VmMappedMemoryRegion {
673        guest_address,
674        slot,
675    } = region_state.mapped_regions.get(allocation)?;
676
677    let (descriptor, file_offset, size) = match source {
678        VmMemorySource::Descriptor {
679            descriptor,
680            offset,
681            size,
682        } => (
683            Descriptor(descriptor.as_raw_descriptor()),
684            *offset,
685            *size as usize,
686        ),
687        VmMemorySource::SharedMemory(shm) => {
688            let size = shm.size() as usize;
689            (Descriptor(shm.as_raw_descriptor()), 0, size)
690        }
691        _ => {
692            let error = anyhow::anyhow!(
693                "source {} is not compatible with fixed mapping into prepared memory region",
694                source
695            );
696            return Some(VmMemoryResponse::Err(error.into()));
697        }
698    };
699    if let Err(err) = vm
700        .add_fd_mapping(
701            *slot,
702            *dest_offset as usize,
703            size,
704            &descriptor,
705            file_offset,
706            *prot,
707        )
708        .context("failed to add fd mapping when trying to map to prepared region")
709    {
710        return Some(VmMemoryResponse::Err(err.into()));
711    }
712
713    let guest_address = GuestAddress(guest_address.0 + dest_offset);
714    let region_id = VmMemoryRegionId(guest_address);
715    region_state.registered_memory.insert(
716        region_id,
717        RegisteredMemory::FixedMapping {
718            slot: *slot,
719            offset: *dest_offset as usize,
720            size,
721        },
722    );
723
724    Some(VmMemoryResponse::RegisterMemory {
725        region_id,
726        slot: *slot,
727    })
728}
729
730impl VmMemoryRequest {
731    /// Executes this request on the given Vm.
732    ///
733    /// # Arguments
734    /// * `vm` - The `Vm` to perform the request on.
735    /// * `allocator` - Used to allocate addresses.
736    ///
737    /// This does not return a result, instead encapsulating the success or failure in a
738    /// `VmMemoryResponse` with the intended purpose of sending the response back over the socket
739    /// that received this `VmMemoryResponse`.
740    pub fn execute(
741        self,
742        #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube,
743        vm: &dyn Vm,
744        sys_allocator: &mut SystemAllocator,
745        gralloc: &mut RutabagaGralloc,
746        iommu_client: Option<&mut VmMemoryRequestIommuClient>,
747        region_state: &mut VmMemoryRegionState,
748    ) -> VmMemoryResponse {
749        use self::VmMemoryRequest::*;
750        match self {
751            PrepareSharedMemoryRegion { alloc, cache } => {
752                // Currently the iommu_client is only used by virtio-gpu when used alongside GPU
753                // pci-passthrough.
754                //
755                // TODO(b/323368701): Make compatible with iommu_client by ensuring that
756                // VirtioIOMMUVfioCommand::VfioDmabufMap is submitted for both dynamic mappings and
757                // fixed mappings (i.e. whether or not try_map_to_prepared_region succeeds in
758                // RegisterMemory case below).
759                assert!(iommu_client.is_none());
760
761                if !sys::should_prepare_memory_region() {
762                    return VmMemoryResponse::Ok;
763                }
764
765                match sys::prepare_shared_memory_region(vm, sys_allocator, alloc, cache)
766                    .context("failed to prepare shared memory region")
767                {
768                    Ok(region) => {
769                        region_state.mapped_regions.insert(alloc, region);
770                        VmMemoryResponse::Ok
771                    }
772                    Err(e) => VmMemoryResponse::Err(e.into()),
773                }
774            }
775            RegisterMemory {
776                source,
777                dest,
778                prot,
779                cache,
780            } => {
781                if let Some(resp) =
782                    try_map_to_prepared_region(vm, region_state, &source, &dest, &prot)
783                {
784                    return resp;
785                }
786
787                // Correct on Windows because callers of this IPC guarantee descriptor is a mapping
788                // handle.
789                let (mapped_region, size, descriptor) =
790                    match source.map(gralloc, prot).context("gralloc mapping") {
791                        Ok((region, size, descriptor)) => (region, size, descriptor),
792                        Err(e) => return VmMemoryResponse::Err(e.into()),
793                    };
794
795                let guest_addr = match dest
796                    .allocate(sys_allocator, size)
797                    .context("VM memory destination allocation fails")
798                {
799                    Ok(addr) => addr,
800                    Err(e) => return VmMemoryResponse::Err(e.into()),
801                };
802
803                let slot = match vm
804                    .add_memory_region(
805                        guest_addr,
806                        mapped_region,
807                        prot == Protection::read(),
808                        false,
809                        cache,
810                    )
811                    .context("failed to add memory region when registering memory")
812                {
813                    Ok(slot) => slot,
814                    Err(e) => return VmMemoryResponse::Err(e.into()),
815                };
816
817                let region_id = VmMemoryRegionId(guest_addr);
818                if let (Some(descriptor), Some(iommu_client)) = (descriptor, iommu_client) {
819                    let request =
820                        VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDmabufMap {
821                            region_id,
822                            gpa: guest_addr.0,
823                            size,
824                            dma_buf: descriptor,
825                        });
826
827                    match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
828                        Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok)) => (),
829                        resp => {
830                            let error = anyhow::anyhow!(
831                                "Unexpected virtio-iommu message response when registering memory: \
832                                 {:?}", resp);
833                            if let Err(e) = vm.remove_memory_region(slot) {
834                                // There is nothing we can do here, so we just log a warning
835                                // message.
836                                warn!("failed to remove memory region: {:?}", e);
837                            }
838                            return VmMemoryResponse::Err(error.into());
839                        }
840                    };
841
842                    iommu_client.registered_memory.insert(region_id);
843                }
844
845                region_state
846                    .registered_memory
847                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
848                VmMemoryResponse::RegisterMemory { region_id, slot }
849            }
850            #[cfg(any(target_os = "android", target_os = "linux"))]
851            MmapAndRegisterMemory {
852                shm,
853                dest,
854                num_file_mappings,
855            } => {
856                // Define a callback to be executed with extended limit of file counts.
857                // It recieves `num_file_mappings` FDs and call `add_fd_mapping` for each.
858                let callback = || {
859                    let mem = match MemoryMappingBuilder::new(shm.size() as usize)
860                        .from_shared_memory(&shm)
861                        .build()
862                        .context("failed to build MemoryMapping from shared memory")
863                    {
864                        Ok(mem) => mem,
865                        Err(e) => return Err(VmMemoryResponse::Err(e.into())),
866                    };
867                    let mut mmap_arena = MemoryMappingArena::from(mem);
868
869                    // If `num_file_mappings` exceeds `SCM_MAX_FD`, `file_mappings` are sent in
870                    // chunks of length `SCM_MAX_FD`.
871                    let mut file_mappings = Vec::with_capacity(num_file_mappings);
872                    let mut read = 0;
873                    while read < num_file_mappings {
874                        let len = std::cmp::min(num_file_mappings - read, base::unix::SCM_MAX_FD);
875                        let mps: Vec<VmMemoryFileMapping> = match tube
876                            .recv_with_max_fds(len)
877                            .with_context(|| format!("get {num_file_mappings} FDs to be mapped"))
878                        {
879                            Ok(m) => m,
880                            Err(e) => return Err(VmMemoryResponse::Err(e.into())),
881                        };
882                        file_mappings.extend(mps.into_iter());
883                        read += len;
884                    }
885
886                    for VmMemoryFileMapping {
887                        mem_offset,
888                        length,
889                        file,
890                        file_offset,
891                    } in file_mappings
892                    {
893                        if let Err(e) = mmap_arena
894                            .add_fd_mapping(
895                                mem_offset,
896                                length,
897                                &file,
898                                file_offset,
899                                Protection::read(),
900                            )
901                            .context(
902                                "failed to add fd mapping when handling mmap and register memory",
903                            )
904                        {
905                            return Err(VmMemoryResponse::Err(e.into()));
906                        }
907                    }
908                    Ok(mmap_arena)
909                };
910                let mmap_arena = match call_with_extended_max_files(callback)
911                    .context("failed to set max count of file descriptors")
912                {
913                    Ok(Ok(m)) => m,
914                    Ok(Err(e)) => {
915                        return e;
916                    }
917                    Err(e) => {
918                        error!("{e:?}");
919                        return VmMemoryResponse::Err(e.into());
920                    }
921                };
922
923                let size = shm.size();
924                let guest_addr = match dest.allocate(sys_allocator, size).context(
925                    "VM memory destination allocation fails when handling mmap and register memory",
926                ) {
927                    Ok(addr) => addr,
928                    Err(e) => return VmMemoryResponse::Err(e.into()),
929                };
930
931                let slot = match vm
932                    .add_memory_region(
933                        guest_addr,
934                        Box::new(mmap_arena),
935                        true,
936                        false,
937                        MemCacheType::CacheCoherent,
938                    )
939                    .context("failed to add memory region when handling mmap and register memory")
940                {
941                    Ok(slot) => slot,
942                    Err(e) => return VmMemoryResponse::Err(e.into()),
943                };
944
945                let region_id = VmMemoryRegionId(guest_addr);
946
947                region_state
948                    .registered_memory
949                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
950
951                VmMemoryResponse::RegisterMemory { region_id, slot }
952            }
953            UnregisterMemory(id) => match region_state.registered_memory.remove(&id) {
954                Some(RegisteredMemory::DynamicMapping { slot }) => match vm
955                    .remove_memory_region(slot)
956                    .context(
957                        "failed to remove memory region when unregistering dynamic mapping memory",
958                    ) {
959                    Ok(_) => {
960                        if let Some(iommu_client) = iommu_client {
961                            if iommu_client.registered_memory.remove(&id) {
962                                let request = VirtioIOMMURequest::VfioCommand(
963                                    VirtioIOMMUVfioCommand::VfioDmabufUnmap(id),
964                                );
965
966                                match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
967                                    Ok(VirtioIOMMUResponse::VfioResponse(
968                                        VirtioIOMMUVfioResult::Ok,
969                                    )) => VmMemoryResponse::Ok,
970                                    resp => {
971                                        let error = anyhow::anyhow!(
972                                            "Unexpected virtio-iommu message response when \
973                                             unregistering memory: {:?}",
974                                            resp
975                                        );
976                                        VmMemoryResponse::Err(error.into())
977                                    }
978                                }
979                            } else {
980                                VmMemoryResponse::Ok
981                            }
982                        } else {
983                            VmMemoryResponse::Ok
984                        }
985                    }
986                    Err(e) => VmMemoryResponse::Err(e.into()),
987                },
988                Some(RegisteredMemory::FixedMapping { slot, offset, size }) => {
989                    match vm.remove_mapping(slot, offset, size).context(
990                        "failed to remove memory mapping when unregistering fixed mapping memory",
991                    ) {
992                        Ok(()) => VmMemoryResponse::Ok,
993                        Err(e) => VmMemoryResponse::Err(e.into()),
994                    }
995                }
996                None => {
997                    let error =
998                        anyhow::anyhow!("can't find the memory region when unregistering memory");
999                    VmMemoryResponse::Err(error.into())
1000                }
1001            },
1002            DynamicallyFreeMemoryRanges { ranges } => {
1003                let mut r = VmMemoryResponse::Ok;
1004                for (guest_address, size) in ranges {
1005                    match vm
1006                        .handle_balloon_event(BalloonEvent::Inflate(MemRegion {
1007                            guest_address,
1008                            size,
1009                        }))
1010                        .context(
1011                            "failed to handle the inflate balloon event when freeing memory ranges \
1012                             dynamically",
1013                        ) {
1014                        Ok(_) => {}
1015                        Err(e) => {
1016                            error!("{:?}", e);
1017                            r = VmMemoryResponse::Err(e.into());
1018                            break;
1019                        }
1020                    }
1021                }
1022                r
1023            }
1024            DynamicallyReclaimMemoryRanges { ranges } => {
1025                let mut r = VmMemoryResponse::Ok;
1026                for (guest_address, size) in ranges {
1027                    match vm
1028                        .handle_balloon_event(BalloonEvent::Deflate(MemRegion {
1029                            guest_address,
1030                            size,
1031                        }))
1032                        .context(
1033                            "failed to handle the deflate balloon event when reclaiming memory \
1034                             ranges dynamically",
1035                        ) {
1036                        Ok(_) => {}
1037                        Err(e) => {
1038                            error!("{:?}", e);
1039                            r = VmMemoryResponse::Err(e.into());
1040                            break;
1041                        }
1042                    }
1043                }
1044                r
1045            }
1046            BalloonTargetReached { size } => {
1047                match vm
1048                    .handle_balloon_event(BalloonEvent::BalloonTargetReached(size))
1049                    .context("failed to handle the target reached balloon event")
1050                {
1051                    Ok(_) => VmMemoryResponse::Ok,
1052                    Err(e) => VmMemoryResponse::Err(e.into()),
1053                }
1054            }
1055            IoEventRaw(request) => {
1056                let res = if request.register {
1057                    vm.register_ioevent(
1058                        request.event,
1059                        IoEventAddress::Mmio(request.addr),
1060                        request.datamatch,
1061                    )
1062                    .context("failed to register IO event")
1063                } else {
1064                    vm.unregister_ioevent(
1065                        request.event,
1066                        IoEventAddress::Mmio(request.addr),
1067                        request.datamatch,
1068                    )
1069                    .context("failed to unregister IO event")
1070                };
1071                match res {
1072                    Ok(_) => VmMemoryResponse::Ok,
1073                    Err(e) => VmMemoryResponse::Err(e.into()),
1074                }
1075            }
1076        }
1077    }
1078}
1079
1080#[derive(Serialize, Deserialize, Debug, PartialOrd, PartialEq, Eq, Ord, Clone, Copy)]
1081/// Identifer for registered memory regions. Globally unique.
1082// The current implementation uses guest physical address as the unique identifier.
1083pub struct VmMemoryRegionId(pub GuestAddress);
1084
1085#[derive(Serialize, Deserialize, Debug)]
1086pub enum VmMemoryResponse {
1087    /// The request to register memory into guest address space was successful.
1088    RegisterMemory {
1089        region_id: VmMemoryRegionId,
1090        slot: u32,
1091    },
1092    Ok,
1093    Err(VmMemoryResponseError),
1094}
1095
1096impl<T> From<Result<T>> for VmMemoryResponse {
1097    fn from(r: Result<T>) -> Self {
1098        match r {
1099            Ok(_) => VmMemoryResponse::Ok,
1100            Err(e) => VmMemoryResponse::Err(anyhow::Error::new(e).into()),
1101        }
1102    }
1103}
1104
1105#[derive(Debug, thiserror::Error)]
1106#[error("Vm memory response error: {0}")]
1107pub struct VmMemoryResponseError(#[from] pub anyhow::Error);
1108
1109impl TryFrom<FlatVmMemoryResponseError> for VmMemoryResponseError {
1110    type Error = anyhow::Error;
1111    fn try_from(value: FlatVmMemoryResponseError) -> StdResult<Self, Self::Error> {
1112        let inner = value
1113            .0
1114            .into_iter()
1115            .fold(
1116                None,
1117                |error: Option<anyhow::Error>, current_context| match error {
1118                    Some(error) => Some(error.context(current_context)),
1119                    None => Some(anyhow::Error::msg(current_context)),
1120                },
1121            )
1122            .context("should carry at least one error")?;
1123        Ok(Self(inner))
1124    }
1125}
1126
1127impl Serialize for VmMemoryResponseError {
1128    fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error>
1129    where
1130        S: serde::Serializer,
1131    {
1132        let flat: FlatVmMemoryResponseError = self.into();
1133        flat.serialize(serializer)
1134    }
1135}
1136
1137impl<'de> Deserialize<'de> for VmMemoryResponseError {
1138    fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error>
1139    where
1140        D: serde::Deserializer<'de>,
1141    {
1142        let flat = FlatVmMemoryResponseError::deserialize(deserializer)?;
1143        flat.try_into()
1144            .map_err(|e: anyhow::Error| D::Error::custom(e.to_string()))
1145    }
1146}
1147
1148#[derive(Debug, Serialize, Deserialize)]
1149struct FlatVmMemoryResponseError(Vec<String>);
1150
1151impl From<&VmMemoryResponseError> for FlatVmMemoryResponseError {
1152    fn from(value: &VmMemoryResponseError) -> Self {
1153        let contexts = value
1154            .0
1155            .chain()
1156            .map(ToString::to_string)
1157            .rev()
1158            .collect::<Vec<_>>();
1159        Self(contexts)
1160    }
1161}
1162
1163#[derive(Serialize, Deserialize, Debug)]
1164pub enum VmIrqRequest {
1165    /// Allocate one gsi, and associate gsi to irqfd with register_irqfd()
1166    AllocateOneMsi {
1167        irqfd: Event,
1168        device_id: DeviceId,
1169        queue_id: usize,
1170        device_name: String,
1171    },
1172    /// Allocate a specific gsi to irqfd with register_irqfd(). This must only
1173    /// be used when it is known that the gsi is free. Only the snapshot
1174    /// subsystem can make this guarantee, and use of this request by any other
1175    /// caller is strongly discouraged.
1176    AllocateOneMsiAtGsi {
1177        irqfd: Event,
1178        gsi: u32,
1179        device_id: DeviceId,
1180        queue_id: usize,
1181        device_name: String,
1182    },
1183    /// Add one msi route entry into the IRQ chip.
1184    AddMsiRoute {
1185        gsi: u32,
1186        msi_address: u64,
1187        msi_data: u32,
1188        #[cfg(target_arch = "aarch64")]
1189        pci_address: resources::PciAddress,
1190    },
1191    // unregister_irqfs() and release gsi
1192    ReleaseOneIrq {
1193        gsi: u32,
1194        irqfd: Event,
1195    },
1196}
1197
1198/// Data to set up an IRQ event or IRQ route on the IRQ chip.
1199/// VmIrqRequest::execute can't take an `IrqChip` argument, because of a dependency cycle between
1200/// devices and vm_control, so it takes a Fn that processes an `IrqSetup`.
1201pub enum IrqSetup<'a> {
1202    Event(u32, &'a Event, DeviceId, usize, String),
1203    Route(IrqRoute),
1204    UnRegister(u32, &'a Event),
1205}
1206
1207impl VmIrqRequest {
1208    /// Executes this request on the given Vm.
1209    ///
1210    /// # Arguments
1211    /// * `set_up_irq` - A function that applies an `IrqSetup` to an IRQ chip.
1212    ///
1213    /// This does not return a result, instead encapsulating the success or failure in a
1214    /// `VmIrqResponse` with the intended purpose of sending the response back over the socket
1215    /// that received this `VmIrqResponse`.
1216    pub fn execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse
1217    where
1218        F: FnOnce(IrqSetup) -> Result<()>,
1219    {
1220        use self::VmIrqRequest::*;
1221        match *self {
1222            AllocateOneMsi {
1223                ref irqfd,
1224                device_id,
1225                queue_id,
1226                ref device_name,
1227            } => {
1228                if let Some(irq_num) = sys_allocator.allocate_irq() {
1229                    match set_up_irq(IrqSetup::Event(
1230                        irq_num,
1231                        irqfd,
1232                        device_id,
1233                        queue_id,
1234                        device_name.clone(),
1235                    )) {
1236                        Ok(_) => VmIrqResponse::AllocateOneMsi { gsi: irq_num },
1237                        Err(e) => VmIrqResponse::Err(e),
1238                    }
1239                } else {
1240                    VmIrqResponse::Err(SysError::new(EINVAL))
1241                }
1242            }
1243            AllocateOneMsiAtGsi {
1244                ref irqfd,
1245                gsi,
1246                device_id,
1247                queue_id,
1248                ref device_name,
1249            } => {
1250                match set_up_irq(IrqSetup::Event(
1251                    gsi,
1252                    irqfd,
1253                    device_id,
1254                    queue_id,
1255                    device_name.clone(),
1256                )) {
1257                    Ok(_) => VmIrqResponse::Ok,
1258                    Err(e) => VmIrqResponse::Err(e),
1259                }
1260            }
1261            AddMsiRoute {
1262                gsi,
1263                msi_address,
1264                msi_data,
1265                #[cfg(target_arch = "aarch64")]
1266                pci_address,
1267            } => {
1268                let route = IrqRoute {
1269                    gsi,
1270                    source: IrqSource::Msi {
1271                        address: msi_address,
1272                        data: msi_data,
1273                        #[cfg(target_arch = "aarch64")]
1274                        pci_address,
1275                    },
1276                };
1277                match set_up_irq(IrqSetup::Route(route)) {
1278                    Ok(_) => VmIrqResponse::Ok,
1279                    Err(e) => VmIrqResponse::Err(e),
1280                }
1281            }
1282            ReleaseOneIrq { gsi, ref irqfd } => {
1283                let _ = set_up_irq(IrqSetup::UnRegister(gsi, irqfd));
1284                sys_allocator.release_irq(gsi);
1285                VmIrqResponse::Ok
1286            }
1287        }
1288    }
1289}
1290
1291#[derive(Serialize, Deserialize, Debug)]
1292pub enum VmIrqResponse {
1293    AllocateOneMsi { gsi: u32 },
1294    Ok,
1295    Err(SysError),
1296}
1297
1298#[derive(Serialize, Deserialize, Debug, Clone)]
1299pub enum DevicesState {
1300    Sleep,
1301    Wake,
1302}
1303
1304#[derive(Serialize, Deserialize, Debug, Clone)]
1305pub enum BatControlResult {
1306    Ok,
1307    NoBatDevice,
1308    NoSuchHealth,
1309    NoSuchProperty,
1310    NoSuchStatus,
1311    NoSuchBatType,
1312    StringParseIntErr,
1313    StringParseBoolErr,
1314}
1315
1316impl Display for BatControlResult {
1317    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1318        use self::BatControlResult::*;
1319
1320        match self {
1321            Ok => write!(f, "Setting battery property successfully"),
1322            NoBatDevice => write!(f, "No battery device created"),
1323            NoSuchHealth => write!(f, "Invalid Battery health setting. Only support: unknown/good/overheat/dead/overvoltage/unexpectedfailure/cold/watchdogtimerexpire/safetytimerexpire/overcurrent"),
1324            NoSuchProperty => write!(f, "Battery doesn't have such property. Only support: status/health/present/capacity/aconline"),
1325            NoSuchStatus => write!(f, "Invalid Battery status setting. Only support: unknown/charging/discharging/notcharging/full"),
1326            NoSuchBatType => write!(f, "Invalid Battery type setting. Only support: goldfish"),
1327            StringParseIntErr => write!(f, "Battery property target ParseInt error"),
1328            StringParseBoolErr => write!(f, "Battery property target ParseBool error"),
1329        }
1330    }
1331}
1332
1333#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, PartialEq, Eq)]
1334#[serde(rename_all = "kebab-case")]
1335pub enum BatteryType {
1336    #[default]
1337    Goldfish,
1338}
1339
1340impl FromStr for BatteryType {
1341    type Err = BatControlResult;
1342
1343    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1344        match s {
1345            "goldfish" => Ok(BatteryType::Goldfish),
1346            _ => Err(BatControlResult::NoSuchBatType),
1347        }
1348    }
1349}
1350
1351#[derive(Serialize, Deserialize, Debug)]
1352pub enum BatProperty {
1353    Status,
1354    Health,
1355    Present,
1356    Capacity,
1357    ACOnline,
1358    SetFakeBatConfig,
1359    CancelFakeBatConfig,
1360}
1361
1362impl FromStr for BatProperty {
1363    type Err = BatControlResult;
1364
1365    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1366        match s {
1367            "status" => Ok(BatProperty::Status),
1368            "health" => Ok(BatProperty::Health),
1369            "present" => Ok(BatProperty::Present),
1370            "capacity" => Ok(BatProperty::Capacity),
1371            "aconline" => Ok(BatProperty::ACOnline),
1372            "set_fake_bat_config" => Ok(BatProperty::SetFakeBatConfig),
1373            "cancel_fake_bat_config" => Ok(BatProperty::CancelFakeBatConfig),
1374            _ => Err(BatControlResult::NoSuchProperty),
1375        }
1376    }
1377}
1378
1379impl Display for BatProperty {
1380    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1381        match *self {
1382            BatProperty::Status => write!(f, "status"),
1383            BatProperty::Health => write!(f, "health"),
1384            BatProperty::Present => write!(f, "present"),
1385            BatProperty::Capacity => write!(f, "capacity"),
1386            BatProperty::ACOnline => write!(f, "aconline"),
1387            BatProperty::SetFakeBatConfig => write!(f, "set_fake_bat_config"),
1388            BatProperty::CancelFakeBatConfig => write!(f, "cancel_fake_bat_config"),
1389        }
1390    }
1391}
1392
1393#[derive(Serialize, Deserialize, Debug)]
1394pub enum BatStatus {
1395    Unknown,
1396    Charging,
1397    DisCharging,
1398    NotCharging,
1399    Full,
1400}
1401
1402impl BatStatus {
1403    pub fn new(status: String) -> std::result::Result<Self, BatControlResult> {
1404        match status.as_str() {
1405            "unknown" => Ok(BatStatus::Unknown),
1406            "charging" => Ok(BatStatus::Charging),
1407            "discharging" => Ok(BatStatus::DisCharging),
1408            "notcharging" => Ok(BatStatus::NotCharging),
1409            "full" => Ok(BatStatus::Full),
1410            _ => Err(BatControlResult::NoSuchStatus),
1411        }
1412    }
1413}
1414
1415impl FromStr for BatStatus {
1416    type Err = BatControlResult;
1417
1418    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1419        match s {
1420            "unknown" => Ok(BatStatus::Unknown),
1421            "charging" => Ok(BatStatus::Charging),
1422            "discharging" => Ok(BatStatus::DisCharging),
1423            "notcharging" => Ok(BatStatus::NotCharging),
1424            "full" => Ok(BatStatus::Full),
1425            _ => Err(BatControlResult::NoSuchStatus),
1426        }
1427    }
1428}
1429
1430impl From<BatStatus> for u32 {
1431    fn from(status: BatStatus) -> Self {
1432        status as u32
1433    }
1434}
1435
1436#[derive(Serialize, Deserialize, Debug)]
1437pub enum BatHealth {
1438    Unknown,
1439    Good,
1440    Overheat,
1441    Dead,
1442    OverVoltage,
1443    UnexpectedFailure,
1444    Cold,
1445    WatchdogTimerExpire,
1446    SafetyTimerExpire,
1447    OverCurrent,
1448}
1449
1450impl FromStr for BatHealth {
1451    type Err = BatControlResult;
1452
1453    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1454        match s {
1455            "unknown" => Ok(BatHealth::Unknown),
1456            "good" => Ok(BatHealth::Good),
1457            "overheat" => Ok(BatHealth::Overheat),
1458            "dead" => Ok(BatHealth::Dead),
1459            "overvoltage" => Ok(BatHealth::OverVoltage),
1460            "unexpectedfailure" => Ok(BatHealth::UnexpectedFailure),
1461            "cold" => Ok(BatHealth::Cold),
1462            "watchdogtimerexpire" => Ok(BatHealth::WatchdogTimerExpire),
1463            "safetytimerexpire" => Ok(BatHealth::SafetyTimerExpire),
1464            "overcurrent" => Ok(BatHealth::OverCurrent),
1465            _ => Err(BatControlResult::NoSuchHealth),
1466        }
1467    }
1468}
1469
1470impl From<BatHealth> for u32 {
1471    fn from(status: BatHealth) -> Self {
1472        status as u32
1473    }
1474}
1475
1476#[derive(Serialize, Deserialize, Debug)]
1477pub enum BatControlCommand {
1478    SetStatus(BatStatus),
1479    SetHealth(BatHealth),
1480    SetPresent(u32),
1481    SetCapacity(u32),
1482    SetACOnline(u32),
1483    SetFakeBatConfig(u32),
1484    CancelFakeConfig,
1485}
1486
1487impl BatControlCommand {
1488    pub fn new(property: String, target: String) -> std::result::Result<Self, BatControlResult> {
1489        let cmd = property.parse::<BatProperty>()?;
1490        match cmd {
1491            BatProperty::Status => Ok(BatControlCommand::SetStatus(target.parse::<BatStatus>()?)),
1492            BatProperty::Health => Ok(BatControlCommand::SetHealth(target.parse::<BatHealth>()?)),
1493            BatProperty::Present => Ok(BatControlCommand::SetPresent(
1494                target
1495                    .parse::<u32>()
1496                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1497            )),
1498            BatProperty::Capacity => Ok(BatControlCommand::SetCapacity(
1499                target
1500                    .parse::<u32>()
1501                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1502            )),
1503            BatProperty::ACOnline => Ok(BatControlCommand::SetACOnline(
1504                target
1505                    .parse::<u32>()
1506                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1507            )),
1508            BatProperty::SetFakeBatConfig => Ok(BatControlCommand::SetFakeBatConfig(
1509                target
1510                    .parse::<u32>()
1511                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1512            )),
1513            BatProperty::CancelFakeBatConfig => Ok(BatControlCommand::CancelFakeConfig),
1514        }
1515    }
1516}
1517
1518/// Used for VM to control battery properties.
1519pub struct BatControl {
1520    pub type_: BatteryType,
1521    pub control_tube: Tube,
1522}
1523
1524/// Used for VM to control for virtio-snd
1525#[derive(Serialize, Deserialize, Debug)]
1526pub enum SndControlCommand {
1527    MuteAll(bool),
1528}
1529
1530// Used to mark hotplug pci device's device type
1531#[derive(Serialize, Deserialize, Debug, Clone)]
1532pub enum HotPlugDeviceType {
1533    UpstreamPort,
1534    DownstreamPort,
1535    EndPoint,
1536}
1537
1538// Used for VM to hotplug pci devices
1539#[derive(Serialize, Deserialize, Debug, Clone)]
1540pub struct HotPlugDeviceInfo {
1541    pub device_type: HotPlugDeviceType,
1542    pub path: PathBuf,
1543    pub hp_interrupt: bool,
1544}
1545
1546/// Message for communicating a suspend or resume to the virtio-pvclock device.
1547#[derive(Serialize, Deserialize, Debug, Clone)]
1548pub enum PvClockCommand {
1549    Suspend,
1550    Resume,
1551}
1552
1553/// Message used by virtio-pvclock to communicate command results.
1554#[derive(Serialize, Deserialize, Debug)]
1555pub enum PvClockCommandResponse {
1556    Ok,
1557    Resumed { total_suspended_ticks: u64 },
1558    DeviceInactive,
1559    Err(SysError),
1560}
1561
1562/// Commands for vmm-swap feature
1563#[derive(Serialize, Deserialize, Debug)]
1564pub enum SwapCommand {
1565    Enable,
1566    Trim,
1567    SwapOut,
1568    Disable { slow_file_cleanup: bool },
1569    Status,
1570}
1571
1572///
1573/// A request to the main process to perform some operation on the VM.
1574///
1575/// Unless otherwise noted, each request should expect a `VmResponse::Ok` to be received on success.
1576#[derive(Serialize, Deserialize, Debug)]
1577pub enum VmRequest {
1578    /// Break the VM's run loop and exit.
1579    Exit,
1580    /// Trigger a power button event in the guest.
1581    Powerbtn,
1582    /// Trigger a sleep button event in the guest.
1583    Sleepbtn,
1584    /// Trigger a RTC interrupt in the guest. When the irq associated with the RTC is
1585    /// resampled, it will be re-asserted as long as `clear_evt` is not signaled.
1586    Rtc { clear_evt: Event },
1587    /// Suspend the VM's VCPUs until resume.
1588    SuspendVcpus,
1589    /// Swap the memory content into files on a disk
1590    Swap(SwapCommand),
1591    /// Resume the VM's VCPUs that were previously suspended.
1592    ResumeVcpus,
1593    /// Inject a general-purpose event. If `clear_evt` is provided, when the irq associated
1594    /// with the GPE is resampled, it will be re-asserted as long as `clear_evt` is not
1595    /// signaled.
1596    Gpe { gpe: u32, clear_evt: Option<Event> },
1597    /// Inject a PCI PME
1598    PciPme(u16),
1599    /// Make the VM's RT VCPU real-time.
1600    MakeRT,
1601    /// Command for balloon driver.
1602    #[cfg(feature = "balloon")]
1603    BalloonCommand(BalloonControlCommand),
1604    /// Send a command to a disk chosen by `disk_index`.
1605    /// `disk_index` is a 0-based count of `--disk`, `--rwdisk`, and `-r` command-line options.
1606    DiskCommand {
1607        disk_index: usize,
1608        command: DiskControlCommand,
1609    },
1610    /// Command to use controller.
1611    UsbCommand(UsbControlCommand),
1612    /// Command to modify the gpu.
1613    GpuCommand(GpuControlCommand),
1614    /// Command to set battery.
1615    BatCommand(BatteryType, BatControlCommand),
1616    /// Command to control snd devices
1617    #[cfg(feature = "audio")]
1618    SndCommand(SndControlCommand),
1619    /// Command to add/remove multiple vfio-pci devices
1620    HotPlugVfioCommand {
1621        device: HotPlugDeviceInfo,
1622        add: bool,
1623    },
1624    /// Command to add/remove network tap device as virtio-pci device
1625    #[cfg(feature = "pci-hotplug")]
1626    HotPlugNetCommand(NetControlCommand),
1627    /// Command to Snapshot devices
1628    Snapshot(SnapshotCommand),
1629    /// Register for event notification
1630    RegisterListener {
1631        socket_addr: String,
1632        event: RegisteredEvent,
1633    },
1634    /// Unregister for notifications for event
1635    UnregisterListener {
1636        socket_addr: String,
1637        event: RegisteredEvent,
1638    },
1639    /// Unregister for all event notification
1640    Unregister { socket_addr: String },
1641    /// Suspend VM VCPUs and Devices until resume.
1642    SuspendVm,
1643    /// Resume VM VCPUs and Devices.
1644    ResumeVm,
1645    /// Returns Vcpus PID/TID
1646    VcpuPidTid,
1647    /// Throttles the requested vCPU for microseconds
1648    Throttle(usize, u32),
1649    /// Returns unique descriptor of this VM.
1650    GetVmDescriptor,
1651    /// Registers memory in guest.
1652    RegisterMemory {
1653        fd: SafeDescriptor,
1654        offset: u64,
1655        range_start: u64,
1656        range_end: u64,
1657        cache_coherent: bool,
1658    },
1659    /// Unregisters memory in guest.
1660    UnregisterMemory { region_id: u64 },
1661}
1662
1663/// NOTE: when making any changes to this enum please also update
1664/// RegisteredEventFfi in crosvm_control/src/lib.rs
1665#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
1666pub enum RegisteredEvent {
1667    VirtioBalloonWsReport,
1668    VirtioBalloonResize,
1669    VirtioBalloonOOMDeflation,
1670}
1671
1672#[derive(Serialize, Deserialize, Debug)]
1673pub enum RegisteredEventWithData {
1674    VirtioBalloonWsReport {
1675        ws_buckets: Vec<balloon_control::WSBucket>,
1676        balloon_actual: u64,
1677    },
1678    VirtioBalloonResize,
1679    VirtioBalloonOOMDeflation,
1680}
1681
1682impl RegisteredEventWithData {
1683    pub fn into_event(&self) -> RegisteredEvent {
1684        match self {
1685            Self::VirtioBalloonWsReport { .. } => RegisteredEvent::VirtioBalloonWsReport,
1686            Self::VirtioBalloonResize => RegisteredEvent::VirtioBalloonResize,
1687            Self::VirtioBalloonOOMDeflation => RegisteredEvent::VirtioBalloonOOMDeflation,
1688        }
1689    }
1690
1691    #[cfg(feature = "registered_events")]
1692    pub fn into_proto(&self) -> registered_events::RegisteredEvent {
1693        match self {
1694            Self::VirtioBalloonWsReport {
1695                ws_buckets,
1696                balloon_actual,
1697            } => {
1698                let mut report = registered_events::VirtioBalloonWsReport {
1699                    balloon_actual: *balloon_actual,
1700                    ..registered_events::VirtioBalloonWsReport::new()
1701                };
1702                for ws in ws_buckets {
1703                    report.ws_buckets.push(registered_events::VirtioWsBucket {
1704                        age: ws.age,
1705                        file_bytes: ws.bytes[0],
1706                        anon_bytes: ws.bytes[1],
1707                        ..registered_events::VirtioWsBucket::new()
1708                    });
1709                }
1710                let mut event = registered_events::RegisteredEvent::new();
1711                event.set_ws_report(report);
1712                event
1713            }
1714            Self::VirtioBalloonResize => {
1715                let mut event = registered_events::RegisteredEvent::new();
1716                event.set_resize(registered_events::VirtioBalloonResize::new());
1717                event
1718            }
1719            Self::VirtioBalloonOOMDeflation => {
1720                let mut event = registered_events::RegisteredEvent::new();
1721                event.set_oom_deflation(registered_events::VirtioBalloonOOMDeflation::new());
1722                event
1723            }
1724        }
1725    }
1726
1727    pub fn from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self {
1728        RegisteredEventWithData::VirtioBalloonWsReport {
1729            ws_buckets: ws.ws.clone(),
1730            balloon_actual,
1731        }
1732    }
1733}
1734
1735pub fn handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse {
1736    // Forward the request to the block device process via its control socket.
1737    if let Err(e) = disk_host_tube.send(command) {
1738        error!("disk socket send failed: {}", e);
1739        return VmResponse::Err(SysError::new(EINVAL));
1740    }
1741
1742    // Wait for the disk control command to be processed
1743    match disk_host_tube.recv() {
1744        Ok(DiskControlResult::Ok) => VmResponse::Ok,
1745        Ok(DiskControlResult::Err(e)) => VmResponse::Err(e),
1746        Err(e) => {
1747            error!("disk socket recv failed: {}", e);
1748            VmResponse::Err(SysError::new(EINVAL))
1749        }
1750    }
1751}
1752
1753/// WARNING: descriptor must be a mapping handle on Windows.
1754fn map_descriptor(
1755    descriptor: &dyn AsRawDescriptor,
1756    offset: u64,
1757    size: u64,
1758    prot: Protection,
1759) -> Result<Box<dyn MappedRegion>> {
1760    let size: usize = size.try_into().map_err(|_e| SysError::new(ERANGE))?;
1761    match MemoryMappingBuilder::new(size)
1762        .from_descriptor(descriptor)
1763        .offset(offset)
1764        .protection(prot)
1765        .build()
1766    {
1767        Ok(mmap) => Ok(Box::new(mmap)),
1768        Err(MmapError::SystemCallFailed(e)) => Err(e),
1769        _ => Err(SysError::new(EINVAL)),
1770    }
1771}
1772
1773// Get vCPU state. vCPUs are expected to all hold the same state.
1774// In this function, there may be a time where vCPUs are not holding the same state
1775// as they transition from one state to the other. This is expected, and the final result
1776// should be all vCPUs holding the same state.
1777fn get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode> {
1778    let (send_chan, recv_chan) = mpsc::channel();
1779    kick_vcpus(VcpuControl::GetStates(send_chan));
1780    if vcpu_num == 0 {
1781        bail!("vcpu_num is zero");
1782    }
1783    let mut current_mode_vec: Vec<VmRunMode> = Vec::new();
1784    for _ in 0..vcpu_num {
1785        match recv_chan.recv() {
1786            Ok(state) => current_mode_vec.push(state),
1787            Err(e) => {
1788                bail!("Failed to get vCPU state: {}", e);
1789            }
1790        };
1791    }
1792    let first_state = current_mode_vec[0];
1793    if first_state == VmRunMode::Exiting {
1794        panic!("Attempt to snapshot while exiting.");
1795    }
1796    if current_mode_vec.iter().any(|x| *x != first_state) {
1797        // We do not panic here. It could be that vCPUs are transitioning from one mode to another.
1798        bail!("Unknown VM state: vCPUs hold different states.");
1799    }
1800    Ok(first_state)
1801}
1802
1803/// A guard to guarantee that all the vCPUs are suspended during the scope.
1804///
1805/// When this guard is dropped, it rolls back the state of CPUs.
1806pub struct VcpuSuspendGuard<'a> {
1807    saved_run_mode: VmRunMode,
1808    kick_vcpus: &'a dyn Fn(VcpuControl),
1809}
1810
1811impl<'a> VcpuSuspendGuard<'a> {
1812    /// Check the all vCPU state and suspend the vCPUs if they are running.
1813    ///
1814    /// This returns [VcpuSuspendGuard] to rollback the vcpu state.
1815    ///
1816    /// # Arguments
1817    ///
1818    /// * `kick_vcpus` - A funtion to send [VcpuControl] message to all the vCPUs and interrupt
1819    ///   them.
1820    /// * `vcpu_num` - The number of vCPUs.
1821    pub fn new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self> {
1822        // get initial vcpu state
1823        let saved_run_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1824        match saved_run_mode {
1825            VmRunMode::Running => {
1826                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1827                // Blocking call, waiting for response to ensure vCPU state was updated.
1828                // In case of failure, where a vCPU still has the state running, start up vcpus and
1829                // abort operation.
1830                let current_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1831                if current_mode != VmRunMode::Suspending {
1832                    kick_vcpus(VcpuControl::RunState(saved_run_mode));
1833                    bail!("vCPUs failed to all suspend. Kicking back all vCPUs to their previous state: {saved_run_mode}");
1834                }
1835            }
1836            VmRunMode::Suspending => {
1837                // do nothing. keep the state suspending.
1838            }
1839            other => {
1840                bail!("vcpus are not in running/suspending state, but {}", other);
1841            }
1842        };
1843        Ok(Self {
1844            saved_run_mode,
1845            kick_vcpus,
1846        })
1847    }
1848}
1849
1850impl Drop for VcpuSuspendGuard<'_> {
1851    fn drop(&mut self) {
1852        if self.saved_run_mode != VmRunMode::Suspending {
1853            (self.kick_vcpus)(VcpuControl::RunState(self.saved_run_mode));
1854        }
1855    }
1856}
1857
1858/// A guard to guarantee that all devices are sleeping during its scope.
1859///
1860/// When this guard is dropped, it wakes the devices.
1861pub struct DeviceSleepGuard<'a> {
1862    device_control_tube: &'a Tube,
1863    devices_state: DevicesState,
1864}
1865
1866impl<'a> DeviceSleepGuard<'a> {
1867    fn new(device_control_tube: &'a Tube) -> anyhow::Result<Self> {
1868        device_control_tube
1869            .send(&DeviceControlCommand::GetDevicesState)
1870            .context("send command to devices control socket")?;
1871        let devices_state = match device_control_tube
1872            .recv()
1873            .context("receive from devices control socket")?
1874        {
1875            VmResponse::DevicesState(state) => state,
1876            resp => bail!("failed to get devices state. Unexpected behavior: {}", resp),
1877        };
1878        if let DevicesState::Wake = devices_state {
1879            device_control_tube
1880                .send(&DeviceControlCommand::SleepDevices)
1881                .context("send command to devices control socket")?;
1882            match device_control_tube
1883                .recv()
1884                .context("receive from devices control socket")?
1885            {
1886                VmResponse::Ok => (),
1887                resp => bail!("device sleep failed: {}", resp),
1888            }
1889        }
1890        Ok(Self {
1891            device_control_tube,
1892            devices_state,
1893        })
1894    }
1895}
1896
1897impl Drop for DeviceSleepGuard<'_> {
1898    fn drop(&mut self) {
1899        if let DevicesState::Wake = self.devices_state {
1900            if let Err(e) = self
1901                .device_control_tube
1902                .send(&DeviceControlCommand::WakeDevices)
1903            {
1904                panic!("failed to request device wake after snapshot: {e}");
1905            }
1906            match self.device_control_tube.recv() {
1907                Ok(VmResponse::Ok) => (),
1908                Ok(resp) => panic!("unexpected response to device wake request: {resp}"),
1909                Err(e) => panic!("failed to get reply for device wake request: {e}"),
1910            }
1911        }
1912    }
1913}
1914
1915impl VmRequest {
1916    /// Executes this request on the given Vm and other mutable state.
1917    ///
1918    /// This does not return a result, instead encapsulating the success or failure in a
1919    /// `VmResponse` with the intended purpose of sending the response back over the  socket that
1920    /// received this `VmRequest`.
1921    ///
1922    /// `suspended_pvclock_state`: If the hypervisor has its own pvclock (not the same as
1923    /// virtio-pvclock) and the VM is suspended (not just the vCPUs, but the full VM), then
1924    /// `suspended_pvclock_state` will be used to store the ClockState saved just after the vCPUs
1925    /// were suspended. It is important that we save the value right after the vCPUs are suspended
1926    /// and restore it right before the vCPUs are resumed (instead of, more naturally, during the
1927    /// snapshot/restore steps) because the pvclock continues to tick even when the vCPUs are
1928    /// suspended.
1929    #[allow(unused_variables)]
1930    pub fn execute(
1931        &self,
1932        vm: &dyn Vm,
1933        disk_host_tubes: &[Tube],
1934        snd_host_tubes: &[Tube],
1935        pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
1936        gpu_control_tube: Option<&Tube>,
1937        usb_control_tube: Option<&Tube>,
1938        bat_control: &mut Option<BatControl>,
1939        kick_vcpus: impl Fn(VcpuControl),
1940        #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl),
1941        force_s2idle: bool,
1942        #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1943        device_control_tube: &Tube,
1944        vcpu_size: usize,
1945        irq_handler_control: &Tube,
1946        snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
1947        suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
1948    ) -> VmResponse {
1949        match self {
1950            VmRequest::Exit => {
1951                panic!("VmRequest::Exit should be handled by the platform run loop");
1952            }
1953            VmRequest::Powerbtn => {
1954                if let Some(pm) = pm {
1955                    pm.lock().pwrbtn_evt();
1956                    VmResponse::Ok
1957                } else {
1958                    error!("{:#?} not supported", *self);
1959                    VmResponse::Err(SysError::new(ENOTSUP))
1960                }
1961            }
1962            VmRequest::Sleepbtn => {
1963                if let Some(pm) = pm {
1964                    pm.lock().slpbtn_evt();
1965                    VmResponse::Ok
1966                } else {
1967                    error!("{:#?} not supported", *self);
1968                    VmResponse::Err(SysError::new(ENOTSUP))
1969                }
1970            }
1971            VmRequest::Rtc { clear_evt } => {
1972                if let Some(pm) = pm.as_ref() {
1973                    match clear_evt.try_clone() {
1974                        Ok(clear_evt) => {
1975                            // RTC event will asynchronously trigger wakeup.
1976                            pm.lock().rtc_evt(clear_evt);
1977                            VmResponse::Ok
1978                        }
1979                        Err(err) => {
1980                            error!("Error cloning clear_evt: {:?}", err);
1981                            VmResponse::Err(SysError::new(EIO))
1982                        }
1983                    }
1984                } else {
1985                    error!("{:#?} not supported", *self);
1986                    VmResponse::Err(SysError::new(ENOTSUP))
1987                }
1988            }
1989            VmRequest::SuspendVcpus => {
1990                if !force_s2idle {
1991                    kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1992                    let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
1993                        Ok(state) => state,
1994                        Err(e) => {
1995                            error!("failed to get vcpu state: {e}");
1996                            return VmResponse::Err(SysError::new(EIO));
1997                        }
1998                    };
1999                    if current_mode != VmRunMode::Suspending {
2000                        error!("vCPUs failed to all suspend.");
2001                        return VmResponse::Err(SysError::new(EIO));
2002                    }
2003                }
2004                VmResponse::Ok
2005            }
2006            VmRequest::ResumeVcpus => {
2007                if let Err(e) = device_control_tube.send(&DeviceControlCommand::GetDevicesState) {
2008                    error!("failed to send GetDevicesState: {}", e);
2009                    return VmResponse::Err(SysError::new(EIO));
2010                }
2011                let devices_state = match device_control_tube.recv() {
2012                    Ok(VmResponse::DevicesState(state)) => state,
2013                    Ok(resp) => {
2014                        error!("failed to get devices state. Unexpected behavior: {}", resp);
2015                        return VmResponse::Err(SysError::new(EINVAL));
2016                    }
2017                    Err(e) => {
2018                        error!("failed to get devices state. Unexpected behavior: {}", e);
2019                        return VmResponse::Err(SysError::new(EINVAL));
2020                    }
2021                };
2022                if let DevicesState::Sleep = devices_state {
2023                    error!("Trying to wake Vcpus while Devices are asleep. Did you mean to use `crosvm resume --full`?");
2024                    return VmResponse::Err(SysError::new(EINVAL));
2025                }
2026
2027                if force_s2idle {
2028                    // During resume also emulate powerbtn event which will allow to wakeup fully
2029                    // suspended guest.
2030                    if let Some(pm) = pm {
2031                        pm.lock().pwrbtn_evt();
2032                    } else {
2033                        error!("triggering power btn during resume not supported");
2034                        return VmResponse::Err(SysError::new(ENOTSUP));
2035                    }
2036                }
2037
2038                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2039                VmResponse::Ok
2040            }
2041            VmRequest::Swap(SwapCommand::Enable) => {
2042                #[cfg(feature = "swap")]
2043                if let Some(swap_controller) = swap_controller {
2044                    // Suspend all vcpus and devices while vmm-swap is enabling (move the guest
2045                    // memory contents to the staging memory) to guarantee no processes other than
2046                    // the swap monitor process access the guest memory.
2047                    let _vcpu_guard = match VcpuSuspendGuard::new(&kick_vcpus, vcpu_size) {
2048                        Ok(guard) => guard,
2049                        Err(e) => {
2050                            error!("failed to suspend vcpus: {:?}", e);
2051                            return VmResponse::Err(SysError::new(EINVAL));
2052                        }
2053                    };
2054                    // TODO(b/253386409): Use `devices::Suspendable::sleep()` instead of sending
2055                    // `SIGSTOP` signal.
2056                    let _devices_guard = match swap_controller.suspend_devices() {
2057                        Ok(guard) => guard,
2058                        Err(e) => {
2059                            error!("failed to suspend devices: {:?}", e);
2060                            return VmResponse::Err(SysError::new(EINVAL));
2061                        }
2062                    };
2063
2064                    return match swap_controller.enable() {
2065                        Ok(()) => VmResponse::Ok,
2066                        Err(e) => {
2067                            error!("swap enable failed: {}", e);
2068                            VmResponse::Err(SysError::new(EINVAL))
2069                        }
2070                    };
2071                }
2072                VmResponse::Err(SysError::new(ENOTSUP))
2073            }
2074            VmRequest::Swap(SwapCommand::Trim) => {
2075                #[cfg(feature = "swap")]
2076                if let Some(swap_controller) = swap_controller {
2077                    return match swap_controller.trim() {
2078                        Ok(()) => VmResponse::Ok,
2079                        Err(e) => {
2080                            error!("swap trim failed: {}", e);
2081                            VmResponse::Err(SysError::new(EINVAL))
2082                        }
2083                    };
2084                }
2085                VmResponse::Err(SysError::new(ENOTSUP))
2086            }
2087            VmRequest::Swap(SwapCommand::SwapOut) => {
2088                #[cfg(feature = "swap")]
2089                if let Some(swap_controller) = swap_controller {
2090                    return match swap_controller.swap_out() {
2091                        Ok(()) => VmResponse::Ok,
2092                        Err(e) => {
2093                            error!("swap out failed: {}", e);
2094                            VmResponse::Err(SysError::new(EINVAL))
2095                        }
2096                    };
2097                }
2098                VmResponse::Err(SysError::new(ENOTSUP))
2099            }
2100            VmRequest::Swap(SwapCommand::Disable {
2101                #[cfg(feature = "swap")]
2102                slow_file_cleanup,
2103                ..
2104            }) => {
2105                #[cfg(feature = "swap")]
2106                if let Some(swap_controller) = swap_controller {
2107                    return match swap_controller.disable(*slow_file_cleanup) {
2108                        Ok(()) => VmResponse::Ok,
2109                        Err(e) => {
2110                            error!("swap disable failed: {}", e);
2111                            VmResponse::Err(SysError::new(EINVAL))
2112                        }
2113                    };
2114                }
2115                VmResponse::Err(SysError::new(ENOTSUP))
2116            }
2117            VmRequest::Swap(SwapCommand::Status) => {
2118                #[cfg(feature = "swap")]
2119                if let Some(swap_controller) = swap_controller {
2120                    return match swap_controller.status() {
2121                        Ok(status) => VmResponse::SwapStatus(status),
2122                        Err(e) => {
2123                            error!("swap status failed: {}", e);
2124                            VmResponse::Err(SysError::new(EINVAL))
2125                        }
2126                    };
2127                }
2128                VmResponse::Err(SysError::new(ENOTSUP))
2129            }
2130            VmRequest::SuspendVm => {
2131                info!("Starting crosvm suspend");
2132                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
2133                let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
2134                    Ok(state) => state,
2135                    Err(e) => {
2136                        error!("failed to get vcpu state: {e}");
2137                        return VmResponse::Err(SysError::new(EIO));
2138                    }
2139                };
2140                if current_mode != VmRunMode::Suspending {
2141                    error!("vCPUs failed to all suspend.");
2142                    return VmResponse::Err(SysError::new(EIO));
2143                }
2144                // Snapshot the pvclock ASAP after stopping vCPUs.
2145                if vm.check_capability(VmCap::PvClock) {
2146                    if suspended_pvclock_state.is_none() {
2147                        *suspended_pvclock_state = Some(match vm.get_pvclock() {
2148                            Ok(x) => x,
2149                            Err(e) => {
2150                                error!("suspend_pvclock failed: {e:?}");
2151                                return VmResponse::Err(SysError::new(EIO));
2152                            }
2153                        });
2154                    }
2155                }
2156                if let Err(e) = device_control_tube
2157                    .send(&DeviceControlCommand::SleepDevices)
2158                    .context("send command to devices control socket")
2159                {
2160                    error!("{:?}", e);
2161                    return VmResponse::Err(SysError::new(EIO));
2162                };
2163                match device_control_tube
2164                    .recv()
2165                    .context("receive from devices control socket")
2166                {
2167                    Ok(VmResponse::Ok) => {
2168                        info!("Finished crosvm suspend successfully");
2169                        VmResponse::Ok
2170                    }
2171                    Ok(resp) => {
2172                        error!("device sleep failed: {}", resp);
2173                        VmResponse::Err(SysError::new(EIO))
2174                    }
2175                    Err(e) => {
2176                        error!("receive from devices control socket: {:?}", e);
2177                        VmResponse::Err(SysError::new(EIO))
2178                    }
2179                }
2180            }
2181            VmRequest::ResumeVm => {
2182                info!("Starting crosvm resume");
2183                if let Err(e) = device_control_tube
2184                    .send(&DeviceControlCommand::WakeDevices)
2185                    .context("send command to devices control socket")
2186                {
2187                    error!("{:?}", e);
2188                    return VmResponse::Err(SysError::new(EIO));
2189                };
2190                match device_control_tube
2191                    .recv()
2192                    .context("receive from devices control socket")
2193                {
2194                    Ok(VmResponse::Ok) => {
2195                        info!("Finished crosvm resume successfully");
2196                    }
2197                    Ok(resp) => {
2198                        error!("device wake failed: {}", resp);
2199                        return VmResponse::Err(SysError::new(EIO));
2200                    }
2201                    Err(e) => {
2202                        error!("receive from devices control socket: {:?}", e);
2203                        return VmResponse::Err(SysError::new(EIO));
2204                    }
2205                }
2206                // Resume the pvclock as late as possible before starting vCPUs.
2207                if vm.check_capability(VmCap::PvClock) {
2208                    // If None, then we aren't suspended, which is a valid case.
2209                    if let Some(x) = suspended_pvclock_state {
2210                        if let Err(e) = vm.set_pvclock(x) {
2211                            error!("resume_pvclock failed: {e:?}");
2212                            return VmResponse::Err(SysError::new(EIO));
2213                        }
2214                    }
2215                }
2216                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2217                VmResponse::Ok
2218            }
2219            VmRequest::Gpe { gpe, clear_evt } => {
2220                if let Some(pm) = pm.as_ref() {
2221                    match clear_evt.as_ref().map(|e| e.try_clone()).transpose() {
2222                        Ok(clear_evt) => {
2223                            pm.lock().gpe_evt(*gpe, clear_evt);
2224                            VmResponse::Ok
2225                        }
2226                        Err(err) => {
2227                            error!("Error cloning clear_evt: {:?}", err);
2228                            VmResponse::Err(SysError::new(EIO))
2229                        }
2230                    }
2231                } else {
2232                    error!("{:#?} not supported", *self);
2233                    VmResponse::Err(SysError::new(ENOTSUP))
2234                }
2235            }
2236            VmRequest::PciPme(requester_id) => {
2237                if let Some(pm) = pm.as_ref() {
2238                    pm.lock().pme_evt(*requester_id);
2239                    VmResponse::Ok
2240                } else {
2241                    error!("{:#?} not supported", *self);
2242                    VmResponse::Err(SysError::new(ENOTSUP))
2243                }
2244            }
2245            VmRequest::MakeRT => {
2246                kick_vcpus(VcpuControl::MakeRT);
2247                VmResponse::Ok
2248            }
2249            #[cfg(feature = "balloon")]
2250            VmRequest::BalloonCommand(_) => unreachable!("Should be handled with BalloonTube"),
2251            VmRequest::DiskCommand {
2252                disk_index,
2253                ref command,
2254            } => match &disk_host_tubes.get(*disk_index) {
2255                Some(tube) => handle_disk_command(command, tube),
2256                None => VmResponse::Err(SysError::new(ENODEV)),
2257            },
2258            VmRequest::GpuCommand(ref cmd) => match gpu_control_tube {
2259                Some(gpu_control) => {
2260                    let res = gpu_control.send(cmd);
2261                    if let Err(e) = res {
2262                        error!("fail to send command to gpu control socket: {}", e);
2263                        return VmResponse::Err(SysError::new(EIO));
2264                    }
2265                    match gpu_control.recv() {
2266                        Ok(response) => VmResponse::GpuResponse(response),
2267                        Err(e) => {
2268                            error!("fail to recv command from gpu control socket: {}", e);
2269                            VmResponse::Err(SysError::new(EIO))
2270                        }
2271                    }
2272                }
2273                None => {
2274                    error!("gpu control is not enabled in crosvm");
2275                    VmResponse::Err(SysError::new(EIO))
2276                }
2277            },
2278            VmRequest::UsbCommand(ref cmd) => {
2279                let usb_control_tube = match usb_control_tube {
2280                    Some(t) => t,
2281                    None => {
2282                        error!("attempted to execute USB request without control tube");
2283                        return VmResponse::Err(SysError::new(ENODEV));
2284                    }
2285                };
2286                let res = usb_control_tube.send(cmd);
2287                if let Err(e) = res {
2288                    error!("fail to send command to usb control socket: {}", e);
2289                    return VmResponse::Err(SysError::new(EIO));
2290                }
2291                match usb_control_tube.recv() {
2292                    Ok(response) => VmResponse::UsbResponse(response),
2293                    Err(e) => {
2294                        error!("fail to recv command from usb control socket: {}", e);
2295                        VmResponse::Err(SysError::new(EIO))
2296                    }
2297                }
2298            }
2299            VmRequest::BatCommand(type_, ref cmd) => {
2300                match bat_control {
2301                    Some(battery) => {
2302                        if battery.type_ != *type_ {
2303                            error!("ignored battery command due to battery type: expected {:?}, got {:?}", battery.type_, type_);
2304                            return VmResponse::Err(SysError::new(EINVAL));
2305                        }
2306
2307                        let res = battery.control_tube.send(cmd);
2308                        if let Err(e) = res {
2309                            error!("fail to send command to bat control socket: {}", e);
2310                            return VmResponse::Err(SysError::new(EIO));
2311                        }
2312
2313                        match battery.control_tube.recv() {
2314                            Ok(response) => VmResponse::BatResponse(response),
2315                            Err(e) => {
2316                                error!("fail to recv command from bat control socket: {}", e);
2317                                VmResponse::Err(SysError::new(EIO))
2318                            }
2319                        }
2320                    }
2321                    None => VmResponse::BatResponse(BatControlResult::NoBatDevice),
2322                }
2323            }
2324            #[cfg(feature = "audio")]
2325            VmRequest::SndCommand(ref cmd) => match cmd {
2326                SndControlCommand::MuteAll(muted) => {
2327                    for tube in snd_host_tubes {
2328                        let res = tube.send(&SndControlCommand::MuteAll(*muted));
2329                        if let Err(e) = res {
2330                            error!("fail to send command to snd control socket: {}", e);
2331                            return VmResponse::Err(SysError::new(EIO));
2332                        }
2333
2334                        match tube.recv() {
2335                            Ok(VmResponse::Ok) => {
2336                                debug!("device is successfully muted");
2337                            }
2338                            Ok(resp) => {
2339                                error!("mute failed: {}", resp);
2340                                return VmResponse::ErrString("fail to mute the device".to_owned());
2341                            }
2342                            Err(e) => return VmResponse::Err(SysError::new(EIO)),
2343                        }
2344                    }
2345                    VmResponse::Ok
2346                }
2347            },
2348            VmRequest::HotPlugVfioCommand { device: _, add: _ } => VmResponse::Ok,
2349            #[cfg(feature = "pci-hotplug")]
2350            VmRequest::HotPlugNetCommand(ref _net_cmd) => {
2351                VmResponse::ErrString("hot plug not supported".to_owned())
2352            }
2353            VmRequest::Snapshot(SnapshotCommand::Take {
2354                ref snapshot_path,
2355                compress_memory,
2356                encrypt,
2357            }) => {
2358                info!("Starting crosvm snapshot");
2359                match do_snapshot(
2360                    snapshot_path.to_path_buf(),
2361                    kick_vcpus,
2362                    irq_handler_control,
2363                    device_control_tube,
2364                    vcpu_size,
2365                    snapshot_irqchip,
2366                    *compress_memory,
2367                    *encrypt,
2368                    suspended_pvclock_state,
2369                    vm,
2370                ) {
2371                    Ok(()) => {
2372                        info!("Finished crosvm snapshot successfully");
2373                        VmResponse::Ok
2374                    }
2375                    Err(e) => {
2376                        error!("failed to handle snapshot: {:?}", e);
2377                        VmResponse::Err(SysError::new(EIO))
2378                    }
2379                }
2380            }
2381            VmRequest::RegisterListener {
2382                socket_addr: _,
2383                event: _,
2384            } => VmResponse::Ok,
2385            VmRequest::UnregisterListener {
2386                socket_addr: _,
2387                event: _,
2388            } => VmResponse::Ok,
2389            VmRequest::Unregister { socket_addr: _ } => VmResponse::Ok,
2390            VmRequest::VcpuPidTid => unreachable!(),
2391            VmRequest::Throttle(_, _) => unreachable!(),
2392            VmRequest::GetVmDescriptor => {
2393                let vm_fd = match vm.try_clone_descriptor() {
2394                    Ok(vm_fd) => vm_fd,
2395                    Err(e) => {
2396                        error!("failed to get vm_fd: {:?}", e);
2397                        return VmResponse::Err(e);
2398                    }
2399                };
2400                VmResponse::VmDescriptor {
2401                    hypervisor: vm.hypervisor_kind(),
2402                    vm_fd,
2403                }
2404            }
2405            VmRequest::RegisterMemory { .. } => unreachable!(),
2406            VmRequest::UnregisterMemory { .. } => unreachable!(),
2407        }
2408    }
2409}
2410
2411/// Snapshot the VM to file at `snapshot_path`
2412fn do_snapshot(
2413    snapshot_path: PathBuf,
2414    kick_vcpus: impl Fn(VcpuControl),
2415    irq_handler_control: &Tube,
2416    device_control_tube: &Tube,
2417    vcpu_size: usize,
2418    snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
2419    compress_memory: bool,
2420    encrypt: bool,
2421    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2422    vm: &dyn Vm,
2423) -> anyhow::Result<()> {
2424    let snapshot_start = Instant::now();
2425
2426    let _vcpu_guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size)?;
2427    let _device_guard = DeviceSleepGuard::new(device_control_tube)?;
2428
2429    // We want to flush all pending IRQs to the interrupt controller. There are two cases:
2430    //
2431    // MSIs: these are directly delivered to the interrupt controller.
2432    // We must verify the handler thread cycles once to deliver these interrupts.
2433    //
2434    // Legacy interrupts: in the case of a split IRQ chip, these interrupts may
2435    // flow through the userspace IOAPIC. If the hypervisor does not support
2436    // irqfds (e.g. WHPX), a single iteration will only flush the IRQ to the
2437    // IOAPIC. The underlying MSI will be asserted at this point, but if the
2438    // IRQ handler doesn't run another iteration, it won't be delivered to the
2439    // interrupt controller. This is why we cycle the handler thread twice (doing so
2440    // ensures we process the underlying MSI).
2441    //
2442    // We can handle both of these cases by iterating until there are no tokens
2443    // serviced on the requested iteration. Note that in the legacy case, this
2444    // ensures at least two iterations.
2445    //
2446    // Note: within CrosVM, *all* interrupts are eventually converted into the
2447    // same mechanicism that MSIs use. This is why we say "underlying" MSI for
2448    // a legacy IRQ.
2449    {
2450        let mut flush_attempts = 0;
2451        loop {
2452            irq_handler_control
2453                .send(&IrqHandlerRequest::WakeAndNotifyIteration)
2454                .context("failed to send flush command to IRQ handler thread")?;
2455            let resp = irq_handler_control
2456                .recv()
2457                .context("failed to recv flush response from IRQ handler thread")?;
2458            match resp {
2459                IrqHandlerResponse::HandlerIterationComplete(tokens_serviced) => {
2460                    if tokens_serviced == 0 {
2461                        break;
2462                    }
2463                }
2464                _ => bail!("received unexpected reply from IRQ handler: {:?}", resp),
2465            }
2466            flush_attempts += 1;
2467            if flush_attempts > EXPECTED_MAX_IRQ_FLUSH_ITERATIONS {
2468                warn!(
2469                    "flushing IRQs for snapshot may be stalled after iteration {}, expected <= {}
2470                      iterations",
2471                    flush_attempts, EXPECTED_MAX_IRQ_FLUSH_ITERATIONS
2472                );
2473            }
2474        }
2475        info!("flushed IRQs in {} iterations", flush_attempts);
2476    }
2477    let snapshot_writer = SnapshotWriter::new(snapshot_path, encrypt)?;
2478
2479    // Snapshot hypervisor's paravirtualized clock.
2480    snapshot_writer.write_fragment("pvclock", &AnySnapshot::to_any(suspended_pvclock_state)?)?;
2481
2482    // Snapshot Vcpus
2483    info!("VCPUs snapshotting...");
2484    let (send_chan, recv_chan) = mpsc::channel();
2485    kick_vcpus(VcpuControl::Snapshot(
2486        snapshot_writer.add_namespace("vcpu")?,
2487        send_chan,
2488    ));
2489    // Validate all Vcpus snapshot successfully
2490    for _ in 0..vcpu_size {
2491        recv_chan
2492            .recv()
2493            .context("Failed to recv Vcpu snapshot response")?
2494            .context("Failed to snapshot Vcpu")?;
2495    }
2496    info!("VCPUs snapshotted.");
2497
2498    // Snapshot irqchip
2499    info!("Snapshotting irqchip...");
2500    let irqchip_snap = snapshot_irqchip()?;
2501    snapshot_writer
2502        .write_fragment("irqchip", &irqchip_snap)
2503        .context("Failed to write irqchip state")?;
2504    info!("Snapshotted irqchip.");
2505
2506    // Snapshot memory
2507    {
2508        let mem_snap_start = Instant::now();
2509        // Use 64MB chunks when writing the memory snapshot (if encryption is used).
2510        const MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES: usize = 1024 * 1024 * 64;
2511        // SAFETY:
2512        // VM & devices are stopped.
2513        let guest_memory_metadata = unsafe {
2514            vm.get_memory()
2515                .snapshot(
2516                    &mut snapshot_writer.raw_fragment_with_chunk_size(
2517                        "mem",
2518                        MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES,
2519                    )?,
2520                    compress_memory,
2521                )
2522                .context("failed to snapshot memory")?
2523        };
2524        snapshot_writer.write_fragment("mem_metadata", &guest_memory_metadata)?;
2525
2526        let mem_snap_duration_ms = mem_snap_start.elapsed().as_millis();
2527        info!(
2528            "snapshot: memory snapshotted {}MB in {}ms",
2529            vm.get_memory().memory_size() / 1024 / 1024,
2530            mem_snap_duration_ms
2531        );
2532        metrics::log_metric_with_details(
2533            metrics::MetricEventType::SnapshotSaveMemoryLatency,
2534            mem_snap_duration_ms as i64,
2535            &metrics_events::RecordDetails {},
2536        );
2537    }
2538    // Snapshot devices
2539    info!("Devices snapshotting...");
2540    device_control_tube
2541        .send(&DeviceControlCommand::SnapshotDevices { snapshot_writer })
2542        .context("send command to devices control socket")?;
2543    let resp: VmResponse = device_control_tube
2544        .recv()
2545        .context("receive from devices control socket")?;
2546    if !matches!(resp, VmResponse::Ok) {
2547        bail!("unexpected SnapshotDevices response: {resp}");
2548    }
2549    info!("Devices snapshotted.");
2550
2551    let snap_duration_ms = snapshot_start.elapsed().as_millis();
2552    info!(
2553        "snapshot: completed snapshot in {}ms; VM mem size: {}MB",
2554        snap_duration_ms,
2555        vm.get_memory().memory_size() / 1024 / 1024,
2556    );
2557    metrics::log_metric_with_details(
2558        metrics::MetricEventType::SnapshotSaveOverallLatency,
2559        snap_duration_ms as i64,
2560        &metrics_events::RecordDetails {},
2561    );
2562    Ok(())
2563}
2564
2565/// Restore the VM to the snapshot at `restore_path`.
2566///
2567/// Same as `VmRequest::execute` with a `VmRequest::Restore`. Exposed as a separate function
2568/// because not all the `VmRequest::execute` arguments are available in the "cold restore" flow.
2569pub fn do_restore(
2570    restore_path: &Path,
2571    kick_vcpus: impl Fn(VcpuControl),
2572    kick_vcpu: impl Fn(VcpuControl, usize),
2573    irq_handler_control: &Tube,
2574    device_control_tube: &Tube,
2575    vcpu_size: usize,
2576    mut restore_irqchip: impl FnMut(AnySnapshot) -> anyhow::Result<()>,
2577    require_encrypted: bool,
2578    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2579    vm: &dyn Vm,
2580) -> anyhow::Result<()> {
2581    let restore_start = Instant::now();
2582    let _guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size);
2583    let _devices_guard = DeviceSleepGuard::new(device_control_tube)?;
2584
2585    let snapshot_reader = SnapshotReader::new(restore_path, require_encrypted)?;
2586
2587    // Restore hypervisor's paravirtualized clock.
2588    *suspended_pvclock_state = snapshot_reader.read_fragment("pvclock")?;
2589
2590    // Restore IrqChip
2591    let irq_snapshot: AnySnapshot = snapshot_reader.read_fragment("irqchip")?;
2592    restore_irqchip(irq_snapshot)?;
2593
2594    // Restore Vcpu(s)
2595    let vcpu_snapshot_reader = snapshot_reader.namespace("vcpu")?;
2596    let vcpu_snapshot_count = vcpu_snapshot_reader.list_fragments()?.len();
2597    if vcpu_snapshot_count != vcpu_size {
2598        bail!(
2599            "bad cpu count in snapshot: expected={} got={}",
2600            vcpu_size,
2601            vcpu_snapshot_count,
2602        );
2603    }
2604    #[cfg(target_arch = "x86_64")]
2605    let host_tsc_reference_moment = {
2606        // SAFETY: rdtsc takes no arguments.
2607        unsafe { _rdtsc() }
2608    };
2609    let (send_chan, recv_chan) = mpsc::channel();
2610    for vcpu_id in 0..vcpu_size {
2611        kick_vcpu(
2612            VcpuControl::Restore(VcpuRestoreRequest {
2613                result_sender: send_chan.clone(),
2614                snapshot_reader: vcpu_snapshot_reader.clone(),
2615                #[cfg(target_arch = "x86_64")]
2616                host_tsc_reference_moment,
2617            }),
2618            vcpu_id,
2619        );
2620    }
2621    for _ in 0..vcpu_size {
2622        recv_chan
2623            .recv()
2624            .context("Failed to recv restore response")?
2625            .context("Failed to restore vcpu")?;
2626    }
2627
2628    // Restore Memory
2629    {
2630        let mem_restore_start = Instant::now();
2631        let guest_memory_metadata = snapshot_reader.read_fragment("mem_metadata")?;
2632        // SAFETY:
2633        // VM & devices are stopped.
2634        unsafe {
2635            vm.get_memory().restore(
2636                guest_memory_metadata,
2637                &mut snapshot_reader.raw_fragment("mem")?,
2638            )?
2639        };
2640        let mem_restore_duration_ms = mem_restore_start.elapsed().as_millis();
2641        info!(
2642            "snapshot: memory restored {}MB in {}ms",
2643            vm.get_memory().memory_size() / 1024 / 1024,
2644            mem_restore_duration_ms
2645        );
2646        metrics::log_metric_with_details(
2647            metrics::MetricEventType::SnapshotRestoreMemoryLatency,
2648            mem_restore_duration_ms as i64,
2649            &metrics_events::RecordDetails {},
2650        );
2651    }
2652    // Restore devices
2653    device_control_tube
2654        .send(&DeviceControlCommand::RestoreDevices {
2655            snapshot_reader: snapshot_reader.clone(),
2656        })
2657        .context("send restore devices command to devices control socket")?;
2658    let resp: VmResponse = device_control_tube
2659        .recv()
2660        .context("receive from devices control socket")?;
2661    if !matches!(resp, VmResponse::Ok) {
2662        bail!("unexpected RestoreDevices response: {resp}");
2663    }
2664
2665    // refresh the IRQ tokens.
2666    {
2667        irq_handler_control
2668            .send(&IrqHandlerRequest::RefreshIrqEventTokens)
2669            .context("failed to send refresh irq event token command to IRQ handler thread")?;
2670        let resp: IrqHandlerResponse = irq_handler_control
2671            .recv()
2672            .context("failed to recv refresh response from IRQ handler thread")?;
2673        if !matches!(resp, IrqHandlerResponse::IrqEventTokenRefreshComplete) {
2674            bail!(
2675                "received unexpected reply from IRQ handler thread: {:?}",
2676                resp
2677            );
2678        }
2679    }
2680
2681    let restore_duration_ms = restore_start.elapsed().as_millis();
2682    info!(
2683        "snapshot: completed restore in {}ms; mem size: {}",
2684        restore_duration_ms,
2685        vm.get_memory().memory_size(),
2686    );
2687
2688    metrics::log_metric_with_details(
2689        metrics::MetricEventType::SnapshotRestoreOverallLatency,
2690        restore_duration_ms as i64,
2691        &metrics_events::RecordDetails {},
2692    );
2693    Ok(())
2694}
2695
2696pub type HypervisorKind = hypervisor::HypervisorKind;
2697
2698/// Indication of success or failure of a `VmRequest`.
2699///
2700/// Success is usually indicated `VmResponse::Ok` unless there is data associated with the response.
2701#[derive(Serialize, Deserialize, Debug)]
2702#[must_use]
2703pub enum VmResponse {
2704    /// Indicates the request was executed successfully.
2705    Ok,
2706    /// Indicates the request encountered some error during execution.
2707    Err(SysError),
2708    /// Indicates the request encountered some error during execution.
2709    ErrString(String),
2710    /// The memory was registered into guest address space in memory slot number `slot`.
2711    RegisterMemory { slot: u32 },
2712    /// Variant of the register memory but with region_id.
2713    RegisterMemory2 { region_id: u64 },
2714    /// Results of balloon control commands.
2715    #[cfg(feature = "balloon")]
2716    BalloonStats {
2717        stats: balloon_control::BalloonStats,
2718        balloon_actual: u64,
2719    },
2720    /// Results of balloon WS-R command
2721    #[cfg(feature = "balloon")]
2722    BalloonWS {
2723        ws: balloon_control::BalloonWS,
2724        balloon_actual: u64,
2725    },
2726    /// Results of PCI hot plug
2727    #[cfg(feature = "pci-hotplug")]
2728    PciHotPlugResponse { bus: u8 },
2729    /// Results of usb control commands.
2730    UsbResponse(UsbControlResult),
2731    /// Results of gpu control commands.
2732    GpuResponse(GpuControlResult),
2733    /// Results of battery control commands.
2734    BatResponse(BatControlResult),
2735    /// Results of swap status command.
2736    SwapStatus(SwapStatus),
2737    /// Gets the state of Devices (sleep/wake)
2738    DevicesState(DevicesState),
2739    /// Map of the Vcpu PID/TIDs
2740    VcpuPidTidResponse {
2741        pid_tid_map: BTreeMap<usize, (u32, u32)>,
2742    },
2743    VmDescriptor {
2744        hypervisor: HypervisorKind,
2745        vm_fd: SafeDescriptor,
2746    },
2747}
2748
2749impl Display for VmResponse {
2750    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2751        use self::VmResponse::*;
2752
2753        match self {
2754            Ok => write!(f, "ok"),
2755            Err(e) => write!(f, "error: {e}"),
2756            ErrString(e) => write!(f, "error: {e}"),
2757            RegisterMemory { slot } => write!(f, "memory registered in slot {slot}"),
2758            RegisterMemory2 { region_id } => {
2759                write!(f, "memory registered in region id {region_id}")
2760            }
2761            #[cfg(feature = "balloon")]
2762            VmResponse::BalloonStats {
2763                stats,
2764                balloon_actual,
2765            } => {
2766                write!(
2767                    f,
2768                    "stats: {}\nballoon_actual: {}",
2769                    serde_json::to_string_pretty(&stats)
2770                        .unwrap_or_else(|_| "invalid_response".to_string()),
2771                    balloon_actual
2772                )
2773            }
2774            #[cfg(feature = "balloon")]
2775            VmResponse::BalloonWS { ws, balloon_actual } => {
2776                write!(
2777                    f,
2778                    "ws: {}, balloon_actual: {}",
2779                    serde_json::to_string_pretty(&ws)
2780                        .unwrap_or_else(|_| "invalid_response".to_string()),
2781                    balloon_actual,
2782                )
2783            }
2784            UsbResponse(result) => write!(f, "usb control request get result {result:?}"),
2785            #[cfg(feature = "pci-hotplug")]
2786            PciHotPlugResponse { bus } => write!(f, "pci hotplug bus {bus:?}"),
2787            GpuResponse(result) => write!(f, "gpu control request result {result:?}"),
2788            BatResponse(result) => write!(f, "{result}"),
2789            SwapStatus(status) => {
2790                write!(
2791                    f,
2792                    "{}",
2793                    serde_json::to_string(&status)
2794                        .unwrap_or_else(|_| "invalid_response".to_string()),
2795                )
2796            }
2797            DevicesState(status) => write!(f, "devices status: {status:?}"),
2798            VcpuPidTidResponse { pid_tid_map } => write!(f, "vcpu pid tid map: {pid_tid_map:?}"),
2799            VmDescriptor { hypervisor, vm_fd } => {
2800                write!(f, "hypervisor: {hypervisor:?}, vm_fd: {vm_fd:?}")
2801            }
2802        }
2803    }
2804}
2805
2806/// Enum that allows remote control of a wait context (used between the Windows GpuDisplay & the
2807/// GPU worker).
2808#[derive(Serialize, Deserialize)]
2809pub enum ModifyWaitContext {
2810    Add(#[serde(with = "with_as_descriptor")] Descriptor),
2811}
2812
2813#[sorted]
2814#[derive(Error, Debug)]
2815pub enum VirtioIOMMUVfioError {
2816    #[error("socket failed")]
2817    SocketFailed,
2818    #[error("unexpected response: {0}")]
2819    UnexpectedResponse(VirtioIOMMUResponse),
2820    #[error("unknown command: `{0}`")]
2821    UnknownCommand(String),
2822    #[error("{0}")]
2823    VfioControl(VirtioIOMMUVfioResult),
2824}
2825
2826#[derive(Serialize, Deserialize, Debug)]
2827pub enum VirtioIOMMUVfioCommand {
2828    // Add the vfio device attached to virtio-iommu.
2829    VfioDeviceAdd {
2830        endpoint_addr: u32,
2831        wrapper_id: u32,
2832        #[serde(with = "with_as_descriptor")]
2833        container: File,
2834    },
2835    // Delete the vfio device attached to virtio-iommu.
2836    VfioDeviceDel {
2837        endpoint_addr: u32,
2838    },
2839    // Map a dma-buf into vfio iommu table
2840    VfioDmabufMap {
2841        region_id: VmMemoryRegionId,
2842        gpa: u64,
2843        size: u64,
2844        dma_buf: SafeDescriptor,
2845    },
2846    // Unmap a dma-buf from vfio iommu table
2847    VfioDmabufUnmap(VmMemoryRegionId),
2848}
2849
2850#[derive(Serialize, Deserialize, Debug)]
2851pub enum VirtioIOMMUVfioResult {
2852    Ok,
2853    NotInPCIRanges,
2854    NoAvailableContainer,
2855    NoSuchDevice,
2856    NoSuchMappedDmabuf,
2857    InvalidParam,
2858}
2859
2860impl Display for VirtioIOMMUVfioResult {
2861    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2862        use self::VirtioIOMMUVfioResult::*;
2863
2864        match self {
2865            Ok => write!(f, "successfully"),
2866            NotInPCIRanges => write!(f, "not in the pci ranges of virtio-iommu"),
2867            NoAvailableContainer => write!(f, "no available vfio container"),
2868            NoSuchDevice => write!(f, "no such a vfio device"),
2869            NoSuchMappedDmabuf => write!(f, "no such a mapped dmabuf"),
2870            InvalidParam => write!(f, "invalid parameters"),
2871        }
2872    }
2873}
2874
2875/// A request to the virtio-iommu process to perform some operations.
2876///
2877/// Unless otherwise noted, each request should expect a `VirtioIOMMUResponse::Ok` to be received on
2878/// success.
2879#[derive(Serialize, Deserialize, Debug)]
2880pub enum VirtioIOMMURequest {
2881    /// Command for vfio related operations.
2882    VfioCommand(VirtioIOMMUVfioCommand),
2883}
2884
2885/// Indication of success or failure of a `VirtioIOMMURequest`.
2886///
2887/// Success is usually indicated `VirtioIOMMUResponse::Ok` unless there is data associated with the
2888/// response.
2889#[derive(Serialize, Deserialize, Debug)]
2890pub enum VirtioIOMMUResponse {
2891    /// Indicates the request was executed successfully.
2892    Ok,
2893    /// Indicates the request encountered some error during execution.
2894    Err(SysError),
2895    /// Results for Vfio commands.
2896    VfioResponse(VirtioIOMMUVfioResult),
2897}
2898
2899impl Display for VirtioIOMMUResponse {
2900    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2901        use self::VirtioIOMMUResponse::*;
2902        match self {
2903            Ok => write!(f, "ok"),
2904            Err(e) => write!(f, "error: {e}"),
2905            VfioResponse(result) => write!(
2906                f,
2907                "The vfio-related virtio-iommu request got result: {result:?}"
2908            ),
2909        }
2910    }
2911}
2912
2913/// Send VirtioIOMMURequest without waiting for the response
2914pub fn virtio_iommu_request_async(
2915    iommu_control_tube: &Tube,
2916    req: &VirtioIOMMURequest,
2917) -> VirtioIOMMUResponse {
2918    match iommu_control_tube.send(&req) {
2919        Ok(_) => VirtioIOMMUResponse::Ok,
2920        Err(e) => {
2921            error!("virtio-iommu socket send failed: {:?}", e);
2922            VirtioIOMMUResponse::Err(SysError::last())
2923        }
2924    }
2925}
2926
2927pub type VirtioIOMMURequestResult = std::result::Result<VirtioIOMMUResponse, ()>;
2928
2929/// Send VirtioIOMMURequest and wait to get the response
2930pub fn virtio_iommu_request(
2931    iommu_control_tube: &Tube,
2932    req: &VirtioIOMMURequest,
2933) -> VirtioIOMMURequestResult {
2934    let response = match virtio_iommu_request_async(iommu_control_tube, req) {
2935        VirtioIOMMUResponse::Ok => match iommu_control_tube.recv() {
2936            Ok(response) => response,
2937            Err(e) => {
2938                error!("virtio-iommu socket recv failed: {:?}", e);
2939                VirtioIOMMUResponse::Err(SysError::last())
2940            }
2941        },
2942        resp => resp,
2943    };
2944    Ok(response)
2945}
2946
2947#[cfg(test)]
2948mod tests {
2949    use anyhow::anyhow;
2950
2951    use super::*;
2952
2953    #[test]
2954    fn vm_memory_response_error_should_serialize_and_deserialize_correctly() {
2955        let source_error: VmMemoryResponseError = anyhow!("root cause")
2956            .context("context 1")
2957            .context("context 2")
2958            .into();
2959        let serialized_bytes =
2960            serde_json::to_vec(&source_error).expect("should serialize to json successfully");
2961        let target_error = serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2962            .expect("should deserialize from json successfully");
2963        assert_eq!(source_error.0.to_string(), target_error.0.to_string());
2964        assert_eq!(
2965            source_error
2966                .0
2967                .chain()
2968                .map(ToString::to_string)
2969                .collect::<Vec<_>>(),
2970            target_error
2971                .0
2972                .chain()
2973                .map(ToString::to_string)
2974                .collect::<Vec<_>>()
2975        );
2976    }
2977
2978    #[test]
2979    fn vm_memory_response_error_deserialization_should_handle_malformat_correctly() {
2980        let flat_source = FlatVmMemoryResponseError(vec![]);
2981        let serialized_bytes =
2982            serde_json::to_vec(&flat_source).expect("should serialize to json successfully");
2983        serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2984            .expect_err("deserialize with 0 error messages should fail");
2985    }
2986}