vm_control/
lib.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Handles IPC for controlling the main VM process.
6//!
7//! The VM Control IPC protocol is synchronous, meaning that each `VmRequest` sent over a connection
8//! will receive a `VmResponse` for that request next time data is received over that connection.
9//!
10//! The wire message format is a little-endian C-struct of fixed size, along with a file descriptor
11//! if the request type expects one.
12
13pub mod api;
14
15mod device_id;
16pub use device_id::DeviceId;
17pub use device_id::PciId;
18pub use device_id::PlatformDeviceId;
19
20#[cfg(feature = "gdb")]
21pub mod gdb;
22pub mod gpu;
23
24use base::debug;
25#[cfg(any(target_os = "android", target_os = "linux"))]
26use base::linux::MemoryMappingBuilderUnix;
27#[cfg(any(target_os = "android", target_os = "linux"))]
28use base::sys::call_with_extended_max_files;
29#[cfg(any(target_os = "android", target_os = "linux"))]
30use base::MemoryMappingArena;
31#[cfg(windows)]
32use base::MemoryMappingBuilderWindows;
33use hypervisor::BalloonEvent;
34use hypervisor::MemCacheType;
35use hypervisor::MemRegion;
36use snapshot::AnySnapshot;
37
38#[cfg(feature = "balloon")]
39mod balloon_tube;
40pub mod client;
41pub mod sys;
42
43#[cfg(target_arch = "x86_64")]
44use std::arch::x86_64::_rdtsc;
45use std::collections::BTreeMap;
46use std::collections::BTreeSet;
47use std::collections::HashMap;
48use std::convert::TryInto;
49use std::fmt;
50use std::fmt::Display;
51use std::fs::File;
52use std::path::Path;
53use std::path::PathBuf;
54use std::result::Result as StdResult;
55use std::str::FromStr;
56use std::sync::mpsc;
57use std::sync::Arc;
58use std::time::Instant;
59
60use anyhow::bail;
61use anyhow::Context;
62use base::error;
63use base::info;
64use base::warn;
65use base::with_as_descriptor;
66use base::AsRawDescriptor;
67use base::Descriptor;
68use base::Error as SysError;
69use base::Event;
70use base::ExternalMapping;
71use base::IntoRawDescriptor;
72use base::MappedRegion;
73use base::MemoryMappingBuilder;
74use base::MmapError;
75use base::Protection;
76use base::Result;
77use base::SafeDescriptor;
78use base::SharedMemory;
79use base::Tube;
80use hypervisor::Datamatch;
81use hypervisor::IoEventAddress;
82use hypervisor::IrqRoute;
83use hypervisor::IrqSource;
84pub use hypervisor::MemSlot;
85use hypervisor::Vm;
86use hypervisor::VmCap;
87use libc::EINVAL;
88use libc::EIO;
89use libc::ENODEV;
90use libc::ENOTSUP;
91use libc::ERANGE;
92#[cfg(feature = "registered_events")]
93use protos::registered_events;
94use remain::sorted;
95use resources::Alloc;
96use resources::SystemAllocator;
97use rutabaga_gfx::RutabagaDescriptor;
98use rutabaga_gfx::RutabagaFromRawDescriptor;
99use rutabaga_gfx::RutabagaGralloc;
100use rutabaga_gfx::RutabagaMappedRegion;
101use rutabaga_gfx::RutabagaMesaHandle;
102use rutabaga_gfx::VulkanInfo;
103use serde::de::Error;
104use serde::Deserialize;
105use serde::Serialize;
106use snapshot::SnapshotReader;
107use snapshot::SnapshotWriter;
108use swap::SwapStatus;
109use sync::Mutex;
110#[cfg(any(target_os = "android", target_os = "linux"))]
111pub use sys::FsMappingRequest;
112#[cfg(windows)]
113pub use sys::InitialAudioSessionState;
114#[cfg(any(target_os = "android", target_os = "linux"))]
115pub use sys::VmMemoryMappingRequest;
116#[cfg(any(target_os = "android", target_os = "linux"))]
117pub use sys::VmMemoryMappingResponse;
118use thiserror::Error;
119pub use vm_control_product::GpuSendToMain;
120pub use vm_control_product::GpuSendToService;
121pub use vm_control_product::ServiceSendToGpu;
122use vm_memory::GuestAddress;
123
124#[cfg(feature = "balloon")]
125pub use crate::balloon_tube::BalloonControlCommand;
126#[cfg(feature = "balloon")]
127pub use crate::balloon_tube::BalloonTube;
128#[cfg(feature = "gdb")]
129pub use crate::gdb::VcpuDebug;
130#[cfg(feature = "gdb")]
131pub use crate::gdb::VcpuDebugStatus;
132#[cfg(feature = "gdb")]
133pub use crate::gdb::VcpuDebugStatusMessage;
134use crate::gpu::GpuControlCommand;
135use crate::gpu::GpuControlResult;
136
137/// Control the state of a particular VM CPU.
138#[derive(Clone, Debug)]
139pub enum VcpuControl {
140    #[cfg(feature = "gdb")]
141    Debug(VcpuDebug),
142    RunState(VmRunMode),
143    MakeRT,
144    // Request the current state of the vCPU. The result is sent back over the included channel.
145    GetStates(mpsc::Sender<VmRunMode>),
146    // Request the vcpu write a snapshot of itself to the writer, then send a `Result` back over
147    // the channel after completion/failure.
148    Snapshot(SnapshotWriter, mpsc::Sender<anyhow::Result<()>>),
149    Restore(VcpuRestoreRequest),
150    #[cfg(any(target_os = "android", target_os = "linux"))]
151    Throttle(u32),
152}
153
154/// Request to restore a Vcpu from a given snapshot, and report the results
155/// back via the provided channel.
156#[derive(Clone, Debug)]
157pub struct VcpuRestoreRequest {
158    pub result_sender: mpsc::Sender<anyhow::Result<()>>,
159    pub snapshot_reader: SnapshotReader,
160    #[cfg(target_arch = "x86_64")]
161    pub host_tsc_reference_moment: u64,
162}
163
164/// Mode of execution for the VM.
165#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
166pub enum VmRunMode {
167    /// The default run mode indicating the VCPUs are running.
168    #[default]
169    Running,
170    /// Indicates that the VCPUs are suspending execution until the `Running` mode is set.
171    Suspending,
172    /// Indicates that the VM is exiting all processes.
173    Exiting,
174    /// Indicates that the VM is in a breakpoint waiting for the debugger to do continue.
175    Breakpoint,
176}
177
178impl Display for VmRunMode {
179    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
180        use self::VmRunMode::*;
181
182        match self {
183            Running => write!(f, "running"),
184            Suspending => write!(f, "suspending"),
185            Exiting => write!(f, "exiting"),
186            Breakpoint => write!(f, "breakpoint"),
187        }
188    }
189}
190
191// Trait for devices that get notification on specific PCI PME
192pub trait PmeNotify: Send {
193    fn notify(&mut self, _requester_id: u16) {}
194}
195
196pub trait PmResource {
197    fn pwrbtn_evt(&mut self) {}
198    fn slpbtn_evt(&mut self) {}
199    fn rtc_evt(&mut self, _clear_evt: Event) {}
200    fn gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>) {}
201    fn pme_evt(&mut self, _requester_id: u16) {}
202    fn register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>) {}
203}
204
205/// The maximum number of devices that can be listed in one `UsbControlCommand`.
206///
207/// This value was set to be equal to `xhci_regs::MAX_PORTS` for convenience, but it is not
208/// necessary for correctness. Importing that value directly would be overkill because it would
209/// require adding a big dependency for a single const.
210pub const USB_CONTROL_MAX_PORTS: usize = 16;
211
212#[derive(Serialize, Deserialize, Debug)]
213pub enum DiskControlCommand {
214    /// Resize a disk to `new_size` in bytes.
215    Resize { new_size: u64 },
216}
217
218impl Display for DiskControlCommand {
219    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
220        use self::DiskControlCommand::*;
221
222        match self {
223            Resize { new_size } => write!(f, "disk_resize {new_size}"),
224        }
225    }
226}
227
228#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
229pub enum DiskControlResult {
230    Ok,
231    Err(SysError),
232}
233
234/// Net control commands for adding and removing tap devices.
235#[cfg(feature = "pci-hotplug")]
236#[derive(Serialize, Deserialize, Debug)]
237pub enum NetControlCommand {
238    AddTap(String),
239    RemoveTap(u8),
240}
241
242#[derive(Serialize, Deserialize, Debug)]
243pub enum UsbControlCommand {
244    AttachDevice {
245        #[serde(with = "with_as_descriptor")]
246        file: File,
247    },
248    AttachSecurityKey {
249        #[serde(with = "with_as_descriptor")]
250        file: File,
251    },
252    DetachDevice {
253        port: u8,
254    },
255    ListDevice {
256        ports: [u8; USB_CONTROL_MAX_PORTS],
257    },
258}
259
260#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default)]
261pub struct UsbControlAttachedDevice {
262    pub port: u8,
263    pub vendor_id: u16,
264    pub product_id: u16,
265}
266
267impl UsbControlAttachedDevice {
268    pub fn valid(self) -> bool {
269        self.port != 0
270    }
271}
272
273#[cfg(feature = "pci-hotplug")]
274#[derive(Serialize, Deserialize, Debug, Clone)]
275#[must_use]
276/// Result for hotplug and removal of PCI device.
277pub enum PciControlResult {
278    AddOk { bus: u8 },
279    ErrString(String),
280    RemoveOk,
281}
282
283#[cfg(feature = "pci-hotplug")]
284impl Display for PciControlResult {
285    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
286        use self::PciControlResult::*;
287
288        match self {
289            AddOk { bus } => write!(f, "add_ok {bus}"),
290            ErrString(e) => write!(f, "error: {e}"),
291            RemoveOk => write!(f, "remove_ok"),
292        }
293    }
294}
295
296#[derive(Serialize, Deserialize, Debug, Clone)]
297pub enum UsbControlResult {
298    Ok { port: u8 },
299    NoAvailablePort,
300    NoSuchDevice,
301    NoSuchPort,
302    FailedToOpenDevice,
303    Devices([UsbControlAttachedDevice; USB_CONTROL_MAX_PORTS]),
304    FailedToInitHostDevice,
305}
306
307impl Display for UsbControlResult {
308    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
309        use self::UsbControlResult::*;
310
311        match self {
312            UsbControlResult::Ok { port } => write!(f, "ok {port}"),
313            NoAvailablePort => write!(f, "no_available_port"),
314            NoSuchDevice => write!(f, "no_such_device"),
315            NoSuchPort => write!(f, "no_such_port"),
316            FailedToOpenDevice => write!(f, "failed_to_open_device"),
317            Devices(devices) => {
318                write!(f, "devices")?;
319                for d in devices.iter().filter(|d| d.valid()) {
320                    write!(f, " {} {:04x} {:04x}", d.port, d.vendor_id, d.product_id)?;
321                }
322                std::result::Result::Ok(())
323            }
324            FailedToInitHostDevice => write!(f, "failed_to_init_host_device"),
325        }
326    }
327}
328
329/// Commands for snapshot feature
330#[derive(Serialize, Deserialize, Debug)]
331pub enum SnapshotCommand {
332    Take {
333        snapshot_path: PathBuf,
334        compress_memory: bool,
335        encrypt: bool,
336    },
337}
338
339/// Commands for actions on devices and the devices control thread.
340#[derive(Serialize, Deserialize, Debug)]
341pub enum DeviceControlCommand {
342    SleepDevices,
343    WakeDevices,
344    SnapshotDevices { snapshot_writer: SnapshotWriter },
345    RestoreDevices { snapshot_reader: SnapshotReader },
346    GetDevicesState,
347    Exit,
348}
349
350/// Commands to control the IRQ handler thread.
351#[derive(Serialize, Deserialize)]
352pub enum IrqHandlerRequest {
353    /// No response is sent for this command.
354    AddIrqControlTubes(Vec<Tube>),
355    /// Refreshes the set of event tokens (Events) from the Irqchip that the IRQ
356    /// handler waits on to forward IRQs to their final destination (e.g. via
357    /// Irqchip::service_irq_event).
358    ///
359    /// If the set of tokens exposed by the Irqchip changes while the VM is
360    /// running (such as for snapshot restore), this command must be sent
361    /// otherwise the VM will not receive IRQs as expected.
362    RefreshIrqEventTokens,
363    WakeAndNotifyIteration,
364    /// No response is sent for this command.
365    Exit,
366}
367
368const EXPECTED_MAX_IRQ_FLUSH_ITERATIONS: usize = 100;
369
370/// Response for [IrqHandlerRequest].
371#[derive(Serialize, Deserialize, Debug)]
372pub enum IrqHandlerResponse {
373    /// Sent when the IRQ event tokens have been refreshed.
374    IrqEventTokenRefreshComplete,
375    /// Specifies the number of tokens serviced in the requested iteration
376    /// (less the token for the `WakeAndNotifyIteration` request).
377    HandlerIterationComplete(usize),
378}
379
380/// Source of a `VmMemoryRequest::RegisterMemory` mapping.
381#[derive(Serialize, Deserialize)]
382pub enum VmMemorySource {
383    /// Register shared memory represented by the given descriptor.
384    /// On Windows, descriptor MUST be a mapping handle.
385    SharedMemory(SharedMemory),
386    /// Register a file mapping from the given descriptor.
387    Descriptor {
388        /// File descriptor to map.
389        descriptor: SafeDescriptor,
390        /// Offset within the file in bytes.
391        offset: u64,
392        /// Size of the mapping in bytes.
393        size: u64,
394    },
395    /// Register memory mapped by Vulkano.
396    Vulkan {
397        descriptor: SafeDescriptor,
398        handle_type: u32,
399        memory_idx: u32,
400        device_uuid: [u8; 16],
401        driver_uuid: [u8; 16],
402        size: u64,
403    },
404    /// Register the current rutabaga external mapping.
405    ExternalMapping { ptr: u64, size: u64 },
406}
407
408// The following are wrappers to avoid base dependencies in the rutabaga crate
409fn to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor {
410    // SAFETY:
411    // Safe because we own the SafeDescriptor at this point.
412    unsafe { RutabagaDescriptor::from_raw_descriptor(s.into_raw_descriptor()) }
413}
414
415struct RutabagaMemoryRegion {
416    region: Box<dyn RutabagaMappedRegion>,
417}
418
419impl RutabagaMemoryRegion {
420    pub fn new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion {
421        RutabagaMemoryRegion { region }
422    }
423}
424
425// SAFETY:
426//
427// Self guarantees `ptr`..`ptr+size` is an mmaped region owned by this object that
428// can't be unmapped during the `MappedRegion`'s lifetime.
429unsafe impl MappedRegion for RutabagaMemoryRegion {
430    fn as_ptr(&self) -> *mut u8 {
431        self.region.as_ptr()
432    }
433
434    fn size(&self) -> usize {
435        self.region.size()
436    }
437}
438
439impl Display for VmMemorySource {
440    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
441        use self::VmMemorySource::*;
442
443        match self {
444            SharedMemory(..) => write!(f, "VmMemorySource::SharedMemory"),
445            Descriptor { .. } => write!(f, "VmMemorySource::Descriptor"),
446            Vulkan { .. } => write!(f, "VmMemorySource::Vulkan"),
447            ExternalMapping { .. } => write!(f, "VmMemorySource::ExternalMapping"),
448        }
449    }
450}
451
452impl VmMemorySource {
453    /// Map the resource and return its mapping and size in bytes.
454    fn map(
455        self,
456        gralloc: &mut RutabagaGralloc,
457        prot: Protection,
458    ) -> anyhow::Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)> {
459        let (mem_region, size, descriptor) = match self {
460            VmMemorySource::Descriptor {
461                descriptor,
462                offset,
463                size,
464            } => (
465                map_descriptor(&descriptor, offset, size, prot)?,
466                size,
467                Some(descriptor),
468            ),
469
470            VmMemorySource::SharedMemory(shm) => {
471                (map_descriptor(&shm, 0, shm.size(), prot)?, shm.size(), None)
472            }
473            VmMemorySource::Vulkan {
474                descriptor,
475                handle_type,
476                memory_idx,
477                device_uuid,
478                driver_uuid,
479                size,
480            } => {
481                let device_id = rutabaga_gfx::DeviceId {
482                    device_uuid,
483                    driver_uuid,
484                };
485                let mapped_region = gralloc
486                    .import_and_map(
487                        RutabagaMesaHandle {
488                            os_handle: to_rutabaga_desciptor(descriptor),
489                            handle_type,
490                        },
491                        VulkanInfo {
492                            memory_idx,
493                            device_id,
494                        },
495                        size,
496                    )
497                    .with_context(|| {
498                        format!(
499                            "gralloc failed to import and map, handle type: {handle_type}, memory index {memory_idx}, \
500                             size: {size}"
501                        )
502                    })?;
503                let mapped_region: Box<dyn MappedRegion> =
504                    Box::new(RutabagaMemoryRegion::new(mapped_region));
505                (mapped_region, size, None)
506            }
507            VmMemorySource::ExternalMapping { ptr, size } => {
508                let mapped_region: Box<dyn MappedRegion> = Box::new(ExternalMapping {
509                    ptr,
510                    size: size as usize,
511                });
512                (mapped_region, size, None)
513            }
514        };
515        Ok((mem_region, size, descriptor))
516    }
517}
518
519/// Destination of a `VmMemoryRequest::RegisterMemory` mapping in guest address space.
520#[derive(Serialize, Deserialize)]
521pub enum VmMemoryDestination {
522    /// Map at an offset within an existing PCI BAR allocation.
523    ExistingAllocation { allocation: Alloc, offset: u64 },
524    /// Map at the specified guest physical address.
525    GuestPhysicalAddress(u64),
526}
527
528impl VmMemoryDestination {
529    /// Allocate and return the guest address of a memory mapping destination.
530    pub fn allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress> {
531        let addr = match self {
532            VmMemoryDestination::ExistingAllocation { allocation, offset } => allocator
533                .mmio_allocator_any()
534                .address_from_pci_offset(allocation, offset, size)
535                .map_err(|_e| SysError::new(EINVAL))?,
536            VmMemoryDestination::GuestPhysicalAddress(gpa) => gpa,
537        };
538        Ok(GuestAddress(addr))
539    }
540}
541
542/// Request to register or unregister an ioevent.
543#[derive(Serialize, Deserialize)]
544pub struct IoEventUpdateRequest {
545    pub event: Event,
546    pub addr: u64,
547    pub datamatch: Datamatch,
548    pub register: bool,
549}
550
551/// Request to mmap a file to a shared memory.
552/// This request is supposed to follow a `VmMemoryRequest::MmapAndRegisterMemory` request that
553/// contains `SharedMemory` that `file` is mmaped to.
554#[cfg(any(target_os = "android", target_os = "linux"))]
555#[derive(Serialize, Deserialize)]
556pub struct VmMemoryFileMapping {
557    #[serde(with = "with_as_descriptor")]
558    pub file: File,
559    pub length: usize,
560    pub mem_offset: usize,
561    pub file_offset: u64,
562}
563
564#[derive(Serialize, Deserialize)]
565pub enum VmMemoryRequest {
566    /// Prepare a shared memory region to make later operations more efficient. This
567    /// may be a no-op depending on underlying platform support.
568    PrepareSharedMemoryRegion { alloc: Alloc, cache: MemCacheType },
569    /// Register a memory to be mapped to the guest.
570    RegisterMemory {
571        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
572        source: VmMemorySource,
573        /// Where to map the memory in the guest.
574        dest: VmMemoryDestination,
575        /// Whether to map the memory read only (true) or read-write (false).
576        prot: Protection,
577        /// Cache attribute for guest memory setting
578        cache: MemCacheType,
579    },
580    #[cfg(any(target_os = "android", target_os = "linux"))]
581    /// Call mmap to `shm` and register the memory region as a read-only guest memory.
582    /// This request is followed by an array of `VmMemoryFileMapping` with length
583    /// `num_file_mappings`
584    MmapAndRegisterMemory {
585        /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
586        shm: SharedMemory,
587        /// Where to map the memory in the guest.
588        dest: VmMemoryDestination,
589        /// Length of the array of `VmMemoryFileMapping` that follows.
590        num_file_mappings: usize,
591    },
592    /// Call hypervisor to free the given memory range.
593    DynamicallyFreeMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
594    /// Call hypervisor to reclaim a priorly freed memory range.
595    DynamicallyReclaimMemoryRanges { ranges: Vec<(GuestAddress, u64)> },
596    /// Balloon allocation/deallocation target reached.
597    BalloonTargetReached { size: u64 },
598    /// Unregister the given memory slot that was previously registered with `RegisterMemory`.
599    UnregisterMemory(VmMemoryRegionId),
600    /// Register an eventfd with raw guest memory address.
601    IoEventRaw(IoEventUpdateRequest),
602}
603
604/// Struct for managing `VmMemoryRequest`s IOMMU related state.
605pub struct VmMemoryRequestIommuClient {
606    tube: Arc<Mutex<Tube>>,
607    registered_memory: BTreeSet<VmMemoryRegionId>,
608}
609
610impl VmMemoryRequestIommuClient {
611    /// Constructs `VmMemoryRequestIommuClient` from a tube for communication with the viommu.
612    pub fn new(tube: Arc<Mutex<Tube>>) -> Self {
613        Self {
614            tube,
615            registered_memory: BTreeSet::new(),
616        }
617    }
618}
619
620enum RegisteredMemory {
621    FixedMapping {
622        slot: MemSlot,
623        offset: usize,
624        size: usize,
625    },
626    DynamicMapping {
627        slot: MemSlot,
628    },
629}
630
631pub struct VmMappedMemoryRegion {
632    guest_address: GuestAddress,
633    slot: MemSlot,
634}
635
636#[derive(Default)]
637pub struct VmMemoryRegionState {
638    mapped_regions: HashMap<Alloc, VmMappedMemoryRegion>,
639    registered_memory: BTreeMap<VmMemoryRegionId, RegisteredMemory>,
640}
641
642fn try_map_to_prepared_region(
643    vm: &dyn Vm,
644    region_state: &mut VmMemoryRegionState,
645    source: &VmMemorySource,
646    dest: &VmMemoryDestination,
647    prot: &Protection,
648) -> Option<VmMemoryResponse> {
649    let VmMemoryDestination::ExistingAllocation {
650        allocation,
651        offset: dest_offset,
652    } = dest
653    else {
654        return None;
655    };
656
657    let VmMappedMemoryRegion {
658        guest_address,
659        slot,
660    } = region_state.mapped_regions.get(allocation)?;
661
662    let (descriptor, file_offset, size) = match source {
663        VmMemorySource::Descriptor {
664            descriptor,
665            offset,
666            size,
667        } => (
668            Descriptor(descriptor.as_raw_descriptor()),
669            *offset,
670            *size as usize,
671        ),
672        VmMemorySource::SharedMemory(shm) => {
673            let size = shm.size() as usize;
674            (Descriptor(shm.as_raw_descriptor()), 0, size)
675        }
676        _ => {
677            let error = anyhow::anyhow!(
678                "source {} is not compatible with fixed mapping into prepared memory region",
679                source
680            );
681            return Some(VmMemoryResponse::Err(error.into()));
682        }
683    };
684    if let Err(err) = vm
685        .add_fd_mapping(
686            *slot,
687            *dest_offset as usize,
688            size,
689            &descriptor,
690            file_offset,
691            *prot,
692        )
693        .context("failed to add fd mapping when trying to map to prepared region")
694    {
695        return Some(VmMemoryResponse::Err(err.into()));
696    }
697
698    let guest_address = GuestAddress(guest_address.0 + dest_offset);
699    let region_id = VmMemoryRegionId(guest_address);
700    region_state.registered_memory.insert(
701        region_id,
702        RegisteredMemory::FixedMapping {
703            slot: *slot,
704            offset: *dest_offset as usize,
705            size,
706        },
707    );
708
709    Some(VmMemoryResponse::RegisterMemory {
710        region_id,
711        slot: *slot,
712    })
713}
714
715impl VmMemoryRequest {
716    /// Executes this request on the given Vm.
717    ///
718    /// # Arguments
719    /// * `vm` - The `Vm` to perform the request on.
720    /// * `allocator` - Used to allocate addresses.
721    ///
722    /// This does not return a result, instead encapsulating the success or failure in a
723    /// `VmMemoryResponse` with the intended purpose of sending the response back over the socket
724    /// that received this `VmMemoryResponse`.
725    pub fn execute(
726        self,
727        #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube,
728        vm: &dyn Vm,
729        sys_allocator: &mut SystemAllocator,
730        gralloc: &mut RutabagaGralloc,
731        iommu_client: Option<&mut VmMemoryRequestIommuClient>,
732        region_state: &mut VmMemoryRegionState,
733    ) -> VmMemoryResponse {
734        use self::VmMemoryRequest::*;
735        match self {
736            PrepareSharedMemoryRegion { alloc, cache } => {
737                // Currently the iommu_client is only used by virtio-gpu when used alongside GPU
738                // pci-passthrough.
739                //
740                // TODO(b/323368701): Make compatible with iommu_client by ensuring that
741                // VirtioIOMMUVfioCommand::VfioDmabufMap is submitted for both dynamic mappings and
742                // fixed mappings (i.e. whether or not try_map_to_prepared_region succeeds in
743                // RegisterMemory case below).
744                assert!(iommu_client.is_none());
745
746                if !sys::should_prepare_memory_region() {
747                    return VmMemoryResponse::Ok;
748                }
749
750                match sys::prepare_shared_memory_region(vm, sys_allocator, alloc, cache)
751                    .context("failed to prepare shared memory region")
752                {
753                    Ok(region) => {
754                        region_state.mapped_regions.insert(alloc, region);
755                        VmMemoryResponse::Ok
756                    }
757                    Err(e) => VmMemoryResponse::Err(e.into()),
758                }
759            }
760            RegisterMemory {
761                source,
762                dest,
763                prot,
764                cache,
765            } => {
766                if let Some(resp) =
767                    try_map_to_prepared_region(vm, region_state, &source, &dest, &prot)
768                {
769                    return resp;
770                }
771
772                // Correct on Windows because callers of this IPC guarantee descriptor is a mapping
773                // handle.
774                let (mapped_region, size, descriptor) =
775                    match source.map(gralloc, prot).context("gralloc mapping") {
776                        Ok((region, size, descriptor)) => (region, size, descriptor),
777                        Err(e) => return VmMemoryResponse::Err(e.into()),
778                    };
779
780                let guest_addr = match dest
781                    .allocate(sys_allocator, size)
782                    .context("VM memory destination allocation fails")
783                {
784                    Ok(addr) => addr,
785                    Err(e) => return VmMemoryResponse::Err(e.into()),
786                };
787
788                let slot = match vm
789                    .add_memory_region(
790                        guest_addr,
791                        mapped_region,
792                        prot == Protection::read(),
793                        false,
794                        cache,
795                    )
796                    .context("failed to add memory region when registering memory")
797                {
798                    Ok(slot) => slot,
799                    Err(e) => return VmMemoryResponse::Err(e.into()),
800                };
801
802                let region_id = VmMemoryRegionId(guest_addr);
803                if let (Some(descriptor), Some(iommu_client)) = (descriptor, iommu_client) {
804                    let request =
805                        VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDmabufMap {
806                            region_id,
807                            gpa: guest_addr.0,
808                            size,
809                            dma_buf: descriptor,
810                        });
811
812                    match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
813                        Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok)) => (),
814                        resp => {
815                            let error = anyhow::anyhow!(
816                                "Unexpected virtio-iommu message response when registering memory: \
817                                 {:?}", resp);
818                            if let Err(e) = vm.remove_memory_region(slot) {
819                                // There is nothing we can do here, so we just log a warning
820                                // message.
821                                warn!("failed to remove memory region: {:?}", e);
822                            }
823                            return VmMemoryResponse::Err(error.into());
824                        }
825                    };
826
827                    iommu_client.registered_memory.insert(region_id);
828                }
829
830                region_state
831                    .registered_memory
832                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
833                VmMemoryResponse::RegisterMemory { region_id, slot }
834            }
835            #[cfg(any(target_os = "android", target_os = "linux"))]
836            MmapAndRegisterMemory {
837                shm,
838                dest,
839                num_file_mappings,
840            } => {
841                // Define a callback to be executed with extended limit of file counts.
842                // It recieves `num_file_mappings` FDs and call `add_fd_mapping` for each.
843                let callback = || {
844                    let mem = match MemoryMappingBuilder::new(shm.size() as usize)
845                        .from_shared_memory(&shm)
846                        .build()
847                        .context("failed to build MemoryMapping from shared memory")
848                    {
849                        Ok(mem) => mem,
850                        Err(e) => return Err(VmMemoryResponse::Err(e.into())),
851                    };
852                    let mut mmap_arena = MemoryMappingArena::from(mem);
853
854                    // If `num_file_mappings` exceeds `SCM_MAX_FD`, `file_mappings` are sent in
855                    // chunks of length `SCM_MAX_FD`.
856                    let mut file_mappings = Vec::with_capacity(num_file_mappings);
857                    let mut read = 0;
858                    while read < num_file_mappings {
859                        let len = std::cmp::min(num_file_mappings - read, base::unix::SCM_MAX_FD);
860                        let mps: Vec<VmMemoryFileMapping> = match tube
861                            .recv_with_max_fds(len)
862                            .with_context(|| format!("get {num_file_mappings} FDs to be mapped"))
863                        {
864                            Ok(m) => m,
865                            Err(e) => return Err(VmMemoryResponse::Err(e.into())),
866                        };
867                        file_mappings.extend(mps.into_iter());
868                        read += len;
869                    }
870
871                    for VmMemoryFileMapping {
872                        mem_offset,
873                        length,
874                        file,
875                        file_offset,
876                    } in file_mappings
877                    {
878                        if let Err(e) = mmap_arena
879                            .add_fd_mapping(
880                                mem_offset,
881                                length,
882                                &file,
883                                file_offset,
884                                Protection::read(),
885                            )
886                            .context(
887                                "failed to add fd mapping when handling mmap and register memory",
888                            )
889                        {
890                            return Err(VmMemoryResponse::Err(e.into()));
891                        }
892                    }
893                    Ok(mmap_arena)
894                };
895                let mmap_arena = match call_with_extended_max_files(callback)
896                    .context("failed to set max count of file descriptors")
897                {
898                    Ok(Ok(m)) => m,
899                    Ok(Err(e)) => {
900                        return e;
901                    }
902                    Err(e) => {
903                        error!("{e:?}");
904                        return VmMemoryResponse::Err(e.into());
905                    }
906                };
907
908                let size = shm.size();
909                let guest_addr = match dest.allocate(sys_allocator, size).context(
910                    "VM memory destination allocation fails when handling mmap and register memory",
911                ) {
912                    Ok(addr) => addr,
913                    Err(e) => return VmMemoryResponse::Err(e.into()),
914                };
915
916                let slot = match vm
917                    .add_memory_region(
918                        guest_addr,
919                        Box::new(mmap_arena),
920                        true,
921                        false,
922                        MemCacheType::CacheCoherent,
923                    )
924                    .context("failed to add memory region when handling mmap and register memory")
925                {
926                    Ok(slot) => slot,
927                    Err(e) => return VmMemoryResponse::Err(e.into()),
928                };
929
930                let region_id = VmMemoryRegionId(guest_addr);
931
932                region_state
933                    .registered_memory
934                    .insert(region_id, RegisteredMemory::DynamicMapping { slot });
935
936                VmMemoryResponse::RegisterMemory { region_id, slot }
937            }
938            UnregisterMemory(id) => match region_state.registered_memory.remove(&id) {
939                Some(RegisteredMemory::DynamicMapping { slot }) => match vm
940                    .remove_memory_region(slot)
941                    .context(
942                        "failed to remove memory region when unregistering dynamic mapping memory",
943                    ) {
944                    Ok(_) => {
945                        if let Some(iommu_client) = iommu_client {
946                            if iommu_client.registered_memory.remove(&id) {
947                                let request = VirtioIOMMURequest::VfioCommand(
948                                    VirtioIOMMUVfioCommand::VfioDmabufUnmap(id),
949                                );
950
951                                match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
952                                    Ok(VirtioIOMMUResponse::VfioResponse(
953                                        VirtioIOMMUVfioResult::Ok,
954                                    )) => VmMemoryResponse::Ok,
955                                    resp => {
956                                        let error = anyhow::anyhow!(
957                                            "Unexpected virtio-iommu message response when \
958                                             unregistering memory: {:?}",
959                                            resp
960                                        );
961                                        VmMemoryResponse::Err(error.into())
962                                    }
963                                }
964                            } else {
965                                VmMemoryResponse::Ok
966                            }
967                        } else {
968                            VmMemoryResponse::Ok
969                        }
970                    }
971                    Err(e) => VmMemoryResponse::Err(e.into()),
972                },
973                Some(RegisteredMemory::FixedMapping { slot, offset, size }) => {
974                    match vm.remove_mapping(slot, offset, size).context(
975                        "failed to remove memory mapping when unregistering fixed mapping memory",
976                    ) {
977                        Ok(()) => VmMemoryResponse::Ok,
978                        Err(e) => VmMemoryResponse::Err(e.into()),
979                    }
980                }
981                None => {
982                    let error =
983                        anyhow::anyhow!("can't find the memory region when unregistering memory");
984                    VmMemoryResponse::Err(error.into())
985                }
986            },
987            DynamicallyFreeMemoryRanges { ranges } => {
988                let mut r = VmMemoryResponse::Ok;
989                for (guest_address, size) in ranges {
990                    match vm
991                        .handle_balloon_event(BalloonEvent::Inflate(MemRegion {
992                            guest_address,
993                            size,
994                        }))
995                        .context(
996                            "failed to handle the inflate balloon event when freeing memory ranges \
997                             dynamically",
998                        ) {
999                        Ok(_) => {}
1000                        Err(e) => {
1001                            error!("{:?}", e);
1002                            r = VmMemoryResponse::Err(e.into());
1003                            break;
1004                        }
1005                    }
1006                }
1007                r
1008            }
1009            DynamicallyReclaimMemoryRanges { ranges } => {
1010                let mut r = VmMemoryResponse::Ok;
1011                for (guest_address, size) in ranges {
1012                    match vm
1013                        .handle_balloon_event(BalloonEvent::Deflate(MemRegion {
1014                            guest_address,
1015                            size,
1016                        }))
1017                        .context(
1018                            "failed to handle the deflate balloon event when reclaiming memory \
1019                             ranges dynamically",
1020                        ) {
1021                        Ok(_) => {}
1022                        Err(e) => {
1023                            error!("{:?}", e);
1024                            r = VmMemoryResponse::Err(e.into());
1025                            break;
1026                        }
1027                    }
1028                }
1029                r
1030            }
1031            BalloonTargetReached { size } => {
1032                match vm
1033                    .handle_balloon_event(BalloonEvent::BalloonTargetReached(size))
1034                    .context("failed to handle the target reached balloon event")
1035                {
1036                    Ok(_) => VmMemoryResponse::Ok,
1037                    Err(e) => VmMemoryResponse::Err(e.into()),
1038                }
1039            }
1040            IoEventRaw(request) => {
1041                let res = if request.register {
1042                    vm.register_ioevent(
1043                        request.event,
1044                        IoEventAddress::Mmio(request.addr),
1045                        request.datamatch,
1046                    )
1047                    .context("failed to register IO event")
1048                } else {
1049                    vm.unregister_ioevent(
1050                        request.event,
1051                        IoEventAddress::Mmio(request.addr),
1052                        request.datamatch,
1053                    )
1054                    .context("failed to unregister IO event")
1055                };
1056                match res {
1057                    Ok(_) => VmMemoryResponse::Ok,
1058                    Err(e) => VmMemoryResponse::Err(e.into()),
1059                }
1060            }
1061        }
1062    }
1063}
1064
1065#[derive(Serialize, Deserialize, Debug, PartialOrd, PartialEq, Eq, Ord, Clone, Copy)]
1066/// Identifer for registered memory regions. Globally unique.
1067// The current implementation uses guest physical address as the unique identifier.
1068pub struct VmMemoryRegionId(pub GuestAddress);
1069
1070#[derive(Serialize, Deserialize, Debug)]
1071pub enum VmMemoryResponse {
1072    /// The request to register memory into guest address space was successful.
1073    RegisterMemory {
1074        region_id: VmMemoryRegionId,
1075        slot: u32,
1076    },
1077    Ok,
1078    Err(VmMemoryResponseError),
1079}
1080
1081impl<T> From<Result<T>> for VmMemoryResponse {
1082    fn from(r: Result<T>) -> Self {
1083        match r {
1084            Ok(_) => VmMemoryResponse::Ok,
1085            Err(e) => VmMemoryResponse::Err(anyhow::Error::new(e).into()),
1086        }
1087    }
1088}
1089
1090#[derive(Debug, thiserror::Error)]
1091#[error("Vm memory response error: {0}")]
1092pub struct VmMemoryResponseError(#[from] pub anyhow::Error);
1093
1094impl TryFrom<FlatVmMemoryResponseError> for VmMemoryResponseError {
1095    type Error = anyhow::Error;
1096    fn try_from(value: FlatVmMemoryResponseError) -> StdResult<Self, Self::Error> {
1097        let inner = value
1098            .0
1099            .into_iter()
1100            .fold(
1101                None,
1102                |error: Option<anyhow::Error>, current_context| match error {
1103                    Some(error) => Some(error.context(current_context)),
1104                    None => Some(anyhow::Error::msg(current_context)),
1105                },
1106            )
1107            .context("should carry at least one error")?;
1108        Ok(Self(inner))
1109    }
1110}
1111
1112impl Serialize for VmMemoryResponseError {
1113    fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error>
1114    where
1115        S: serde::Serializer,
1116    {
1117        let flat: FlatVmMemoryResponseError = self.into();
1118        flat.serialize(serializer)
1119    }
1120}
1121
1122impl<'de> Deserialize<'de> for VmMemoryResponseError {
1123    fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error>
1124    where
1125        D: serde::Deserializer<'de>,
1126    {
1127        let flat = FlatVmMemoryResponseError::deserialize(deserializer)?;
1128        flat.try_into()
1129            .map_err(|e: anyhow::Error| D::Error::custom(e.to_string()))
1130    }
1131}
1132
1133#[derive(Debug, Serialize, Deserialize)]
1134struct FlatVmMemoryResponseError(Vec<String>);
1135
1136impl From<&VmMemoryResponseError> for FlatVmMemoryResponseError {
1137    fn from(value: &VmMemoryResponseError) -> Self {
1138        let contexts = value
1139            .0
1140            .chain()
1141            .map(ToString::to_string)
1142            .rev()
1143            .collect::<Vec<_>>();
1144        Self(contexts)
1145    }
1146}
1147
1148#[derive(Serialize, Deserialize, Debug)]
1149pub enum VmIrqRequest {
1150    /// Allocate one gsi, and associate gsi to irqfd with register_irqfd()
1151    AllocateOneMsi {
1152        irqfd: Event,
1153        device_id: DeviceId,
1154        queue_id: usize,
1155        device_name: String,
1156    },
1157    /// Allocate a specific gsi to irqfd with register_irqfd(). This must only
1158    /// be used when it is known that the gsi is free. Only the snapshot
1159    /// subsystem can make this guarantee, and use of this request by any other
1160    /// caller is strongly discouraged.
1161    AllocateOneMsiAtGsi {
1162        irqfd: Event,
1163        gsi: u32,
1164        device_id: DeviceId,
1165        queue_id: usize,
1166        device_name: String,
1167    },
1168    /// Add one msi route entry into the IRQ chip.
1169    AddMsiRoute {
1170        gsi: u32,
1171        msi_address: u64,
1172        msi_data: u32,
1173        #[cfg(target_arch = "aarch64")]
1174        pci_address: resources::PciAddress,
1175    },
1176    // unregister_irqfs() and release gsi
1177    ReleaseOneIrq {
1178        gsi: u32,
1179        irqfd: Event,
1180    },
1181}
1182
1183/// Data to set up an IRQ event or IRQ route on the IRQ chip.
1184/// VmIrqRequest::execute can't take an `IrqChip` argument, because of a dependency cycle between
1185/// devices and vm_control, so it takes a Fn that processes an `IrqSetup`.
1186pub enum IrqSetup<'a> {
1187    Event(u32, &'a Event, DeviceId, usize, String),
1188    Route(IrqRoute),
1189    UnRegister(u32, &'a Event),
1190}
1191
1192impl VmIrqRequest {
1193    /// Executes this request on the given Vm.
1194    ///
1195    /// # Arguments
1196    /// * `set_up_irq` - A function that applies an `IrqSetup` to an IRQ chip.
1197    ///
1198    /// This does not return a result, instead encapsulating the success or failure in a
1199    /// `VmIrqResponse` with the intended purpose of sending the response back over the socket
1200    /// that received this `VmIrqResponse`.
1201    pub fn execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse
1202    where
1203        F: FnOnce(IrqSetup) -> Result<()>,
1204    {
1205        use self::VmIrqRequest::*;
1206        match *self {
1207            AllocateOneMsi {
1208                ref irqfd,
1209                device_id,
1210                queue_id,
1211                ref device_name,
1212            } => {
1213                if let Some(irq_num) = sys_allocator.allocate_irq() {
1214                    match set_up_irq(IrqSetup::Event(
1215                        irq_num,
1216                        irqfd,
1217                        device_id,
1218                        queue_id,
1219                        device_name.clone(),
1220                    )) {
1221                        Ok(_) => VmIrqResponse::AllocateOneMsi { gsi: irq_num },
1222                        Err(e) => VmIrqResponse::Err(e),
1223                    }
1224                } else {
1225                    VmIrqResponse::Err(SysError::new(EINVAL))
1226                }
1227            }
1228            AllocateOneMsiAtGsi {
1229                ref irqfd,
1230                gsi,
1231                device_id,
1232                queue_id,
1233                ref device_name,
1234            } => {
1235                match set_up_irq(IrqSetup::Event(
1236                    gsi,
1237                    irqfd,
1238                    device_id,
1239                    queue_id,
1240                    device_name.clone(),
1241                )) {
1242                    Ok(_) => VmIrqResponse::Ok,
1243                    Err(e) => VmIrqResponse::Err(e),
1244                }
1245            }
1246            AddMsiRoute {
1247                gsi,
1248                msi_address,
1249                msi_data,
1250                #[cfg(target_arch = "aarch64")]
1251                pci_address,
1252            } => {
1253                let route = IrqRoute {
1254                    gsi,
1255                    source: IrqSource::Msi {
1256                        address: msi_address,
1257                        data: msi_data,
1258                        #[cfg(target_arch = "aarch64")]
1259                        pci_address,
1260                    },
1261                };
1262                match set_up_irq(IrqSetup::Route(route)) {
1263                    Ok(_) => VmIrqResponse::Ok,
1264                    Err(e) => VmIrqResponse::Err(e),
1265                }
1266            }
1267            ReleaseOneIrq { gsi, ref irqfd } => {
1268                let _ = set_up_irq(IrqSetup::UnRegister(gsi, irqfd));
1269                sys_allocator.release_irq(gsi);
1270                VmIrqResponse::Ok
1271            }
1272        }
1273    }
1274}
1275
1276#[derive(Serialize, Deserialize, Debug)]
1277pub enum VmIrqResponse {
1278    AllocateOneMsi { gsi: u32 },
1279    Ok,
1280    Err(SysError),
1281}
1282
1283#[derive(Serialize, Deserialize, Debug, Clone)]
1284pub enum DevicesState {
1285    Sleep,
1286    Wake,
1287}
1288
1289#[derive(Serialize, Deserialize, Debug, Clone)]
1290pub enum BatControlResult {
1291    Ok,
1292    NoBatDevice,
1293    NoSuchHealth,
1294    NoSuchProperty,
1295    NoSuchStatus,
1296    NoSuchBatType,
1297    StringParseIntErr,
1298    StringParseBoolErr,
1299}
1300
1301impl Display for BatControlResult {
1302    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1303        use self::BatControlResult::*;
1304
1305        match self {
1306            Ok => write!(f, "Setting battery property successfully"),
1307            NoBatDevice => write!(f, "No battery device created"),
1308            NoSuchHealth => write!(f, "Invalid Battery health setting. Only support: unknown/good/overheat/dead/overvoltage/unexpectedfailure/cold/watchdogtimerexpire/safetytimerexpire/overcurrent"),
1309            NoSuchProperty => write!(f, "Battery doesn't have such property. Only support: status/health/present/capacity/aconline"),
1310            NoSuchStatus => write!(f, "Invalid Battery status setting. Only support: unknown/charging/discharging/notcharging/full"),
1311            NoSuchBatType => write!(f, "Invalid Battery type setting. Only support: goldfish"),
1312            StringParseIntErr => write!(f, "Battery property target ParseInt error"),
1313            StringParseBoolErr => write!(f, "Battery property target ParseBool error"),
1314        }
1315    }
1316}
1317
1318#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, PartialEq, Eq)]
1319#[serde(rename_all = "kebab-case")]
1320pub enum BatteryType {
1321    #[default]
1322    Goldfish,
1323}
1324
1325impl FromStr for BatteryType {
1326    type Err = BatControlResult;
1327
1328    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1329        match s {
1330            "goldfish" => Ok(BatteryType::Goldfish),
1331            _ => Err(BatControlResult::NoSuchBatType),
1332        }
1333    }
1334}
1335
1336#[derive(Serialize, Deserialize, Debug)]
1337pub enum BatProperty {
1338    Status,
1339    Health,
1340    Present,
1341    Capacity,
1342    ACOnline,
1343    SetFakeBatConfig,
1344    CancelFakeBatConfig,
1345}
1346
1347impl FromStr for BatProperty {
1348    type Err = BatControlResult;
1349
1350    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1351        match s {
1352            "status" => Ok(BatProperty::Status),
1353            "health" => Ok(BatProperty::Health),
1354            "present" => Ok(BatProperty::Present),
1355            "capacity" => Ok(BatProperty::Capacity),
1356            "aconline" => Ok(BatProperty::ACOnline),
1357            "set_fake_bat_config" => Ok(BatProperty::SetFakeBatConfig),
1358            "cancel_fake_bat_config" => Ok(BatProperty::CancelFakeBatConfig),
1359            _ => Err(BatControlResult::NoSuchProperty),
1360        }
1361    }
1362}
1363
1364impl Display for BatProperty {
1365    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1366        match *self {
1367            BatProperty::Status => write!(f, "status"),
1368            BatProperty::Health => write!(f, "health"),
1369            BatProperty::Present => write!(f, "present"),
1370            BatProperty::Capacity => write!(f, "capacity"),
1371            BatProperty::ACOnline => write!(f, "aconline"),
1372            BatProperty::SetFakeBatConfig => write!(f, "set_fake_bat_config"),
1373            BatProperty::CancelFakeBatConfig => write!(f, "cancel_fake_bat_config"),
1374        }
1375    }
1376}
1377
1378#[derive(Serialize, Deserialize, Debug)]
1379pub enum BatStatus {
1380    Unknown,
1381    Charging,
1382    DisCharging,
1383    NotCharging,
1384    Full,
1385}
1386
1387impl BatStatus {
1388    pub fn new(status: String) -> std::result::Result<Self, BatControlResult> {
1389        match status.as_str() {
1390            "unknown" => Ok(BatStatus::Unknown),
1391            "charging" => Ok(BatStatus::Charging),
1392            "discharging" => Ok(BatStatus::DisCharging),
1393            "notcharging" => Ok(BatStatus::NotCharging),
1394            "full" => Ok(BatStatus::Full),
1395            _ => Err(BatControlResult::NoSuchStatus),
1396        }
1397    }
1398}
1399
1400impl FromStr for BatStatus {
1401    type Err = BatControlResult;
1402
1403    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1404        match s {
1405            "unknown" => Ok(BatStatus::Unknown),
1406            "charging" => Ok(BatStatus::Charging),
1407            "discharging" => Ok(BatStatus::DisCharging),
1408            "notcharging" => Ok(BatStatus::NotCharging),
1409            "full" => Ok(BatStatus::Full),
1410            _ => Err(BatControlResult::NoSuchStatus),
1411        }
1412    }
1413}
1414
1415impl From<BatStatus> for u32 {
1416    fn from(status: BatStatus) -> Self {
1417        status as u32
1418    }
1419}
1420
1421#[derive(Serialize, Deserialize, Debug)]
1422pub enum BatHealth {
1423    Unknown,
1424    Good,
1425    Overheat,
1426    Dead,
1427    OverVoltage,
1428    UnexpectedFailure,
1429    Cold,
1430    WatchdogTimerExpire,
1431    SafetyTimerExpire,
1432    OverCurrent,
1433}
1434
1435impl FromStr for BatHealth {
1436    type Err = BatControlResult;
1437
1438    fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1439        match s {
1440            "unknown" => Ok(BatHealth::Unknown),
1441            "good" => Ok(BatHealth::Good),
1442            "overheat" => Ok(BatHealth::Overheat),
1443            "dead" => Ok(BatHealth::Dead),
1444            "overvoltage" => Ok(BatHealth::OverVoltage),
1445            "unexpectedfailure" => Ok(BatHealth::UnexpectedFailure),
1446            "cold" => Ok(BatHealth::Cold),
1447            "watchdogtimerexpire" => Ok(BatHealth::WatchdogTimerExpire),
1448            "safetytimerexpire" => Ok(BatHealth::SafetyTimerExpire),
1449            "overcurrent" => Ok(BatHealth::OverCurrent),
1450            _ => Err(BatControlResult::NoSuchHealth),
1451        }
1452    }
1453}
1454
1455impl From<BatHealth> for u32 {
1456    fn from(status: BatHealth) -> Self {
1457        status as u32
1458    }
1459}
1460
1461#[derive(Serialize, Deserialize, Debug)]
1462pub enum BatControlCommand {
1463    SetStatus(BatStatus),
1464    SetHealth(BatHealth),
1465    SetPresent(u32),
1466    SetCapacity(u32),
1467    SetACOnline(u32),
1468    SetFakeBatConfig(u32),
1469    CancelFakeConfig,
1470}
1471
1472impl BatControlCommand {
1473    pub fn new(property: String, target: String) -> std::result::Result<Self, BatControlResult> {
1474        let cmd = property.parse::<BatProperty>()?;
1475        match cmd {
1476            BatProperty::Status => Ok(BatControlCommand::SetStatus(target.parse::<BatStatus>()?)),
1477            BatProperty::Health => Ok(BatControlCommand::SetHealth(target.parse::<BatHealth>()?)),
1478            BatProperty::Present => Ok(BatControlCommand::SetPresent(
1479                target
1480                    .parse::<u32>()
1481                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1482            )),
1483            BatProperty::Capacity => Ok(BatControlCommand::SetCapacity(
1484                target
1485                    .parse::<u32>()
1486                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1487            )),
1488            BatProperty::ACOnline => Ok(BatControlCommand::SetACOnline(
1489                target
1490                    .parse::<u32>()
1491                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1492            )),
1493            BatProperty::SetFakeBatConfig => Ok(BatControlCommand::SetFakeBatConfig(
1494                target
1495                    .parse::<u32>()
1496                    .map_err(|_| BatControlResult::StringParseIntErr)?,
1497            )),
1498            BatProperty::CancelFakeBatConfig => Ok(BatControlCommand::CancelFakeConfig),
1499        }
1500    }
1501}
1502
1503/// Used for VM to control battery properties.
1504pub struct BatControl {
1505    pub type_: BatteryType,
1506    pub control_tube: Tube,
1507}
1508
1509/// Used for VM to control for virtio-snd
1510#[derive(Serialize, Deserialize, Debug)]
1511pub enum SndControlCommand {
1512    MuteAll(bool),
1513}
1514
1515// Used to mark hotplug pci device's device type
1516#[derive(Serialize, Deserialize, Debug, Clone)]
1517pub enum HotPlugDeviceType {
1518    UpstreamPort,
1519    DownstreamPort,
1520    EndPoint,
1521}
1522
1523// Used for VM to hotplug pci devices
1524#[derive(Serialize, Deserialize, Debug, Clone)]
1525pub struct HotPlugDeviceInfo {
1526    pub device_type: HotPlugDeviceType,
1527    pub path: PathBuf,
1528    pub hp_interrupt: bool,
1529}
1530
1531/// Message for communicating a suspend or resume to the virtio-pvclock device.
1532#[derive(Serialize, Deserialize, Debug, Clone)]
1533pub enum PvClockCommand {
1534    Suspend,
1535    Resume,
1536}
1537
1538/// Message used by virtio-pvclock to communicate command results.
1539#[derive(Serialize, Deserialize, Debug)]
1540pub enum PvClockCommandResponse {
1541    Ok,
1542    Resumed { total_suspended_ticks: u64 },
1543    DeviceInactive,
1544    Err(SysError),
1545}
1546
1547/// Commands for vmm-swap feature
1548#[derive(Serialize, Deserialize, Debug)]
1549pub enum SwapCommand {
1550    Enable,
1551    Trim,
1552    SwapOut,
1553    Disable { slow_file_cleanup: bool },
1554    Status,
1555}
1556
1557///
1558/// A request to the main process to perform some operation on the VM.
1559///
1560/// Unless otherwise noted, each request should expect a `VmResponse::Ok` to be received on success.
1561#[derive(Serialize, Deserialize, Debug)]
1562pub enum VmRequest {
1563    /// Break the VM's run loop and exit.
1564    Exit,
1565    /// Trigger a power button event in the guest.
1566    Powerbtn,
1567    /// Trigger a sleep button event in the guest.
1568    Sleepbtn,
1569    /// Trigger a RTC interrupt in the guest. When the irq associated with the RTC is
1570    /// resampled, it will be re-asserted as long as `clear_evt` is not signaled.
1571    Rtc { clear_evt: Event },
1572    /// Suspend the VM's VCPUs until resume.
1573    SuspendVcpus,
1574    /// Swap the memory content into files on a disk
1575    Swap(SwapCommand),
1576    /// Resume the VM's VCPUs that were previously suspended.
1577    ResumeVcpus,
1578    /// Inject a general-purpose event. If `clear_evt` is provided, when the irq associated
1579    /// with the GPE is resampled, it will be re-asserted as long as `clear_evt` is not
1580    /// signaled.
1581    Gpe { gpe: u32, clear_evt: Option<Event> },
1582    /// Inject a PCI PME
1583    PciPme(u16),
1584    /// Make the VM's RT VCPU real-time.
1585    MakeRT,
1586    /// Command for balloon driver.
1587    #[cfg(feature = "balloon")]
1588    BalloonCommand(BalloonControlCommand),
1589    /// Send a command to a disk chosen by `disk_index`.
1590    /// `disk_index` is a 0-based count of `--disk`, `--rwdisk`, and `-r` command-line options.
1591    DiskCommand {
1592        disk_index: usize,
1593        command: DiskControlCommand,
1594    },
1595    /// Command to use controller.
1596    UsbCommand(UsbControlCommand),
1597    /// Command to modify the gpu.
1598    GpuCommand(GpuControlCommand),
1599    /// Command to set battery.
1600    BatCommand(BatteryType, BatControlCommand),
1601    /// Command to control snd devices
1602    #[cfg(feature = "audio")]
1603    SndCommand(SndControlCommand),
1604    /// Command to add/remove multiple vfio-pci devices
1605    HotPlugVfioCommand {
1606        device: HotPlugDeviceInfo,
1607        add: bool,
1608    },
1609    /// Command to add/remove network tap device as virtio-pci device
1610    #[cfg(feature = "pci-hotplug")]
1611    HotPlugNetCommand(NetControlCommand),
1612    /// Command to Snapshot devices
1613    Snapshot(SnapshotCommand),
1614    /// Register for event notification
1615    RegisterListener {
1616        socket_addr: String,
1617        event: RegisteredEvent,
1618    },
1619    /// Unregister for notifications for event
1620    UnregisterListener {
1621        socket_addr: String,
1622        event: RegisteredEvent,
1623    },
1624    /// Unregister for all event notification
1625    Unregister { socket_addr: String },
1626    /// Suspend VM VCPUs and Devices until resume.
1627    SuspendVm,
1628    /// Resume VM VCPUs and Devices.
1629    ResumeVm,
1630    /// Returns Vcpus PID/TID
1631    VcpuPidTid,
1632    /// Throttles the requested vCPU for microseconds
1633    Throttle(usize, u32),
1634    /// Returns unique descriptor of this VM.
1635    GetVmDescriptor,
1636    /// Registers memory in guest.
1637    RegisterMemory {
1638        fd: SafeDescriptor,
1639        offset: u64,
1640        range_start: u64,
1641        range_end: u64,
1642        cache_coherent: bool,
1643    },
1644    /// Unregisters memory in guest.
1645    UnregisterMemory { region_id: u64 },
1646}
1647
1648/// NOTE: when making any changes to this enum please also update
1649/// RegisteredEventFfi in crosvm_control/src/lib.rs
1650#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
1651pub enum RegisteredEvent {
1652    VirtioBalloonWsReport,
1653    VirtioBalloonResize,
1654    VirtioBalloonOOMDeflation,
1655}
1656
1657#[derive(Serialize, Deserialize, Debug)]
1658pub enum RegisteredEventWithData {
1659    VirtioBalloonWsReport {
1660        ws_buckets: Vec<balloon_control::WSBucket>,
1661        balloon_actual: u64,
1662    },
1663    VirtioBalloonResize,
1664    VirtioBalloonOOMDeflation,
1665}
1666
1667impl RegisteredEventWithData {
1668    pub fn into_event(&self) -> RegisteredEvent {
1669        match self {
1670            Self::VirtioBalloonWsReport { .. } => RegisteredEvent::VirtioBalloonWsReport,
1671            Self::VirtioBalloonResize => RegisteredEvent::VirtioBalloonResize,
1672            Self::VirtioBalloonOOMDeflation => RegisteredEvent::VirtioBalloonOOMDeflation,
1673        }
1674    }
1675
1676    #[cfg(feature = "registered_events")]
1677    pub fn into_proto(&self) -> registered_events::RegisteredEvent {
1678        match self {
1679            Self::VirtioBalloonWsReport {
1680                ws_buckets,
1681                balloon_actual,
1682            } => {
1683                let mut report = registered_events::VirtioBalloonWsReport {
1684                    balloon_actual: *balloon_actual,
1685                    ..registered_events::VirtioBalloonWsReport::new()
1686                };
1687                for ws in ws_buckets {
1688                    report.ws_buckets.push(registered_events::VirtioWsBucket {
1689                        age: ws.age,
1690                        file_bytes: ws.bytes[0],
1691                        anon_bytes: ws.bytes[1],
1692                        ..registered_events::VirtioWsBucket::new()
1693                    });
1694                }
1695                let mut event = registered_events::RegisteredEvent::new();
1696                event.set_ws_report(report);
1697                event
1698            }
1699            Self::VirtioBalloonResize => {
1700                let mut event = registered_events::RegisteredEvent::new();
1701                event.set_resize(registered_events::VirtioBalloonResize::new());
1702                event
1703            }
1704            Self::VirtioBalloonOOMDeflation => {
1705                let mut event = registered_events::RegisteredEvent::new();
1706                event.set_oom_deflation(registered_events::VirtioBalloonOOMDeflation::new());
1707                event
1708            }
1709        }
1710    }
1711
1712    pub fn from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self {
1713        RegisteredEventWithData::VirtioBalloonWsReport {
1714            ws_buckets: ws.ws.clone(),
1715            balloon_actual,
1716        }
1717    }
1718}
1719
1720pub fn handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse {
1721    // Forward the request to the block device process via its control socket.
1722    if let Err(e) = disk_host_tube.send(command) {
1723        error!("disk socket send failed: {}", e);
1724        return VmResponse::Err(SysError::new(EINVAL));
1725    }
1726
1727    // Wait for the disk control command to be processed
1728    match disk_host_tube.recv() {
1729        Ok(DiskControlResult::Ok) => VmResponse::Ok,
1730        Ok(DiskControlResult::Err(e)) => VmResponse::Err(e),
1731        Err(e) => {
1732            error!("disk socket recv failed: {}", e);
1733            VmResponse::Err(SysError::new(EINVAL))
1734        }
1735    }
1736}
1737
1738/// WARNING: descriptor must be a mapping handle on Windows.
1739fn map_descriptor(
1740    descriptor: &dyn AsRawDescriptor,
1741    offset: u64,
1742    size: u64,
1743    prot: Protection,
1744) -> Result<Box<dyn MappedRegion>> {
1745    let size: usize = size.try_into().map_err(|_e| SysError::new(ERANGE))?;
1746    match MemoryMappingBuilder::new(size)
1747        .from_descriptor(descriptor)
1748        .offset(offset)
1749        .protection(prot)
1750        .build()
1751    {
1752        Ok(mmap) => Ok(Box::new(mmap)),
1753        Err(MmapError::SystemCallFailed(e)) => Err(e),
1754        _ => Err(SysError::new(EINVAL)),
1755    }
1756}
1757
1758// Get vCPU state. vCPUs are expected to all hold the same state.
1759// In this function, there may be a time where vCPUs are not holding the same state
1760// as they transition from one state to the other. This is expected, and the final result
1761// should be all vCPUs holding the same state.
1762fn get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode> {
1763    let (send_chan, recv_chan) = mpsc::channel();
1764    kick_vcpus(VcpuControl::GetStates(send_chan));
1765    if vcpu_num == 0 {
1766        bail!("vcpu_num is zero");
1767    }
1768    let mut current_mode_vec: Vec<VmRunMode> = Vec::new();
1769    for _ in 0..vcpu_num {
1770        match recv_chan.recv() {
1771            Ok(state) => current_mode_vec.push(state),
1772            Err(e) => {
1773                bail!("Failed to get vCPU state: {}", e);
1774            }
1775        };
1776    }
1777    let first_state = current_mode_vec[0];
1778    if first_state == VmRunMode::Exiting {
1779        panic!("Attempt to snapshot while exiting.");
1780    }
1781    if current_mode_vec.iter().any(|x| *x != first_state) {
1782        // We do not panic here. It could be that vCPUs are transitioning from one mode to another.
1783        bail!("Unknown VM state: vCPUs hold different states.");
1784    }
1785    Ok(first_state)
1786}
1787
1788/// A guard to guarantee that all the vCPUs are suspended during the scope.
1789///
1790/// When this guard is dropped, it rolls back the state of CPUs.
1791pub struct VcpuSuspendGuard<'a> {
1792    saved_run_mode: VmRunMode,
1793    kick_vcpus: &'a dyn Fn(VcpuControl),
1794}
1795
1796impl<'a> VcpuSuspendGuard<'a> {
1797    /// Check the all vCPU state and suspend the vCPUs if they are running.
1798    ///
1799    /// This returns [VcpuSuspendGuard] to rollback the vcpu state.
1800    ///
1801    /// # Arguments
1802    ///
1803    /// * `kick_vcpus` - A funtion to send [VcpuControl] message to all the vCPUs and interrupt
1804    ///   them.
1805    /// * `vcpu_num` - The number of vCPUs.
1806    pub fn new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self> {
1807        // get initial vcpu state
1808        let saved_run_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1809        match saved_run_mode {
1810            VmRunMode::Running => {
1811                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1812                // Blocking call, waiting for response to ensure vCPU state was updated.
1813                // In case of failure, where a vCPU still has the state running, start up vcpus and
1814                // abort operation.
1815                let current_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1816                if current_mode != VmRunMode::Suspending {
1817                    kick_vcpus(VcpuControl::RunState(saved_run_mode));
1818                    bail!("vCPUs failed to all suspend. Kicking back all vCPUs to their previous state: {saved_run_mode}");
1819                }
1820            }
1821            VmRunMode::Suspending => {
1822                // do nothing. keep the state suspending.
1823            }
1824            other => {
1825                bail!("vcpus are not in running/suspending state, but {}", other);
1826            }
1827        };
1828        Ok(Self {
1829            saved_run_mode,
1830            kick_vcpus,
1831        })
1832    }
1833}
1834
1835impl Drop for VcpuSuspendGuard<'_> {
1836    fn drop(&mut self) {
1837        if self.saved_run_mode != VmRunMode::Suspending {
1838            (self.kick_vcpus)(VcpuControl::RunState(self.saved_run_mode));
1839        }
1840    }
1841}
1842
1843/// A guard to guarantee that all devices are sleeping during its scope.
1844///
1845/// When this guard is dropped, it wakes the devices.
1846pub struct DeviceSleepGuard<'a> {
1847    device_control_tube: &'a Tube,
1848    devices_state: DevicesState,
1849}
1850
1851impl<'a> DeviceSleepGuard<'a> {
1852    fn new(device_control_tube: &'a Tube) -> anyhow::Result<Self> {
1853        device_control_tube
1854            .send(&DeviceControlCommand::GetDevicesState)
1855            .context("send command to devices control socket")?;
1856        let devices_state = match device_control_tube
1857            .recv()
1858            .context("receive from devices control socket")?
1859        {
1860            VmResponse::DevicesState(state) => state,
1861            resp => bail!("failed to get devices state. Unexpected behavior: {}", resp),
1862        };
1863        if let DevicesState::Wake = devices_state {
1864            device_control_tube
1865                .send(&DeviceControlCommand::SleepDevices)
1866                .context("send command to devices control socket")?;
1867            match device_control_tube
1868                .recv()
1869                .context("receive from devices control socket")?
1870            {
1871                VmResponse::Ok => (),
1872                resp => bail!("device sleep failed: {}", resp),
1873            }
1874        }
1875        Ok(Self {
1876            device_control_tube,
1877            devices_state,
1878        })
1879    }
1880}
1881
1882impl Drop for DeviceSleepGuard<'_> {
1883    fn drop(&mut self) {
1884        if let DevicesState::Wake = self.devices_state {
1885            if let Err(e) = self
1886                .device_control_tube
1887                .send(&DeviceControlCommand::WakeDevices)
1888            {
1889                panic!("failed to request device wake after snapshot: {e}");
1890            }
1891            match self.device_control_tube.recv() {
1892                Ok(VmResponse::Ok) => (),
1893                Ok(resp) => panic!("unexpected response to device wake request: {resp}"),
1894                Err(e) => panic!("failed to get reply for device wake request: {e}"),
1895            }
1896        }
1897    }
1898}
1899
1900impl VmRequest {
1901    /// Executes this request on the given Vm and other mutable state.
1902    ///
1903    /// This does not return a result, instead encapsulating the success or failure in a
1904    /// `VmResponse` with the intended purpose of sending the response back over the  socket that
1905    /// received this `VmRequest`.
1906    ///
1907    /// `suspended_pvclock_state`: If the hypervisor has its own pvclock (not the same as
1908    /// virtio-pvclock) and the VM is suspended (not just the vCPUs, but the full VM), then
1909    /// `suspended_pvclock_state` will be used to store the ClockState saved just after the vCPUs
1910    /// were suspended. It is important that we save the value right after the vCPUs are suspended
1911    /// and restore it right before the vCPUs are resumed (instead of, more naturally, during the
1912    /// snapshot/restore steps) because the pvclock continues to tick even when the vCPUs are
1913    /// suspended.
1914    #[allow(unused_variables)]
1915    pub fn execute(
1916        &self,
1917        vm: &dyn Vm,
1918        disk_host_tubes: &[Tube],
1919        snd_host_tubes: &[Tube],
1920        pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
1921        gpu_control_tube: Option<&Tube>,
1922        usb_control_tube: Option<&Tube>,
1923        bat_control: &mut Option<BatControl>,
1924        kick_vcpus: impl Fn(VcpuControl),
1925        #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl),
1926        force_s2idle: bool,
1927        #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1928        device_control_tube: &Tube,
1929        vcpu_size: usize,
1930        irq_handler_control: &Tube,
1931        snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
1932        suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
1933    ) -> VmResponse {
1934        match self {
1935            VmRequest::Exit => {
1936                panic!("VmRequest::Exit should be handled by the platform run loop");
1937            }
1938            VmRequest::Powerbtn => {
1939                if let Some(pm) = pm {
1940                    pm.lock().pwrbtn_evt();
1941                    VmResponse::Ok
1942                } else {
1943                    error!("{:#?} not supported", *self);
1944                    VmResponse::Err(SysError::new(ENOTSUP))
1945                }
1946            }
1947            VmRequest::Sleepbtn => {
1948                if let Some(pm) = pm {
1949                    pm.lock().slpbtn_evt();
1950                    VmResponse::Ok
1951                } else {
1952                    error!("{:#?} not supported", *self);
1953                    VmResponse::Err(SysError::new(ENOTSUP))
1954                }
1955            }
1956            VmRequest::Rtc { clear_evt } => {
1957                if let Some(pm) = pm.as_ref() {
1958                    match clear_evt.try_clone() {
1959                        Ok(clear_evt) => {
1960                            // RTC event will asynchronously trigger wakeup.
1961                            pm.lock().rtc_evt(clear_evt);
1962                            VmResponse::Ok
1963                        }
1964                        Err(err) => {
1965                            error!("Error cloning clear_evt: {:?}", err);
1966                            VmResponse::Err(SysError::new(EIO))
1967                        }
1968                    }
1969                } else {
1970                    error!("{:#?} not supported", *self);
1971                    VmResponse::Err(SysError::new(ENOTSUP))
1972                }
1973            }
1974            VmRequest::SuspendVcpus => {
1975                if !force_s2idle {
1976                    kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1977                    let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
1978                        Ok(state) => state,
1979                        Err(e) => {
1980                            error!("failed to get vcpu state: {e}");
1981                            return VmResponse::Err(SysError::new(EIO));
1982                        }
1983                    };
1984                    if current_mode != VmRunMode::Suspending {
1985                        error!("vCPUs failed to all suspend.");
1986                        return VmResponse::Err(SysError::new(EIO));
1987                    }
1988                }
1989                VmResponse::Ok
1990            }
1991            VmRequest::ResumeVcpus => {
1992                if let Err(e) = device_control_tube.send(&DeviceControlCommand::GetDevicesState) {
1993                    error!("failed to send GetDevicesState: {}", e);
1994                    return VmResponse::Err(SysError::new(EIO));
1995                }
1996                let devices_state = match device_control_tube.recv() {
1997                    Ok(VmResponse::DevicesState(state)) => state,
1998                    Ok(resp) => {
1999                        error!("failed to get devices state. Unexpected behavior: {}", resp);
2000                        return VmResponse::Err(SysError::new(EINVAL));
2001                    }
2002                    Err(e) => {
2003                        error!("failed to get devices state. Unexpected behavior: {}", e);
2004                        return VmResponse::Err(SysError::new(EINVAL));
2005                    }
2006                };
2007                if let DevicesState::Sleep = devices_state {
2008                    error!("Trying to wake Vcpus while Devices are asleep. Did you mean to use `crosvm resume --full`?");
2009                    return VmResponse::Err(SysError::new(EINVAL));
2010                }
2011
2012                if force_s2idle {
2013                    // During resume also emulate powerbtn event which will allow to wakeup fully
2014                    // suspended guest.
2015                    if let Some(pm) = pm {
2016                        pm.lock().pwrbtn_evt();
2017                    } else {
2018                        error!("triggering power btn during resume not supported");
2019                        return VmResponse::Err(SysError::new(ENOTSUP));
2020                    }
2021                }
2022
2023                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2024                VmResponse::Ok
2025            }
2026            VmRequest::Swap(SwapCommand::Enable) => {
2027                #[cfg(feature = "swap")]
2028                if let Some(swap_controller) = swap_controller {
2029                    // Suspend all vcpus and devices while vmm-swap is enabling (move the guest
2030                    // memory contents to the staging memory) to guarantee no processes other than
2031                    // the swap monitor process access the guest memory.
2032                    let _vcpu_guard = match VcpuSuspendGuard::new(&kick_vcpus, vcpu_size) {
2033                        Ok(guard) => guard,
2034                        Err(e) => {
2035                            error!("failed to suspend vcpus: {:?}", e);
2036                            return VmResponse::Err(SysError::new(EINVAL));
2037                        }
2038                    };
2039                    // TODO(b/253386409): Use `devices::Suspendable::sleep()` instead of sending
2040                    // `SIGSTOP` signal.
2041                    let _devices_guard = match swap_controller.suspend_devices() {
2042                        Ok(guard) => guard,
2043                        Err(e) => {
2044                            error!("failed to suspend devices: {:?}", e);
2045                            return VmResponse::Err(SysError::new(EINVAL));
2046                        }
2047                    };
2048
2049                    return match swap_controller.enable() {
2050                        Ok(()) => VmResponse::Ok,
2051                        Err(e) => {
2052                            error!("swap enable failed: {}", e);
2053                            VmResponse::Err(SysError::new(EINVAL))
2054                        }
2055                    };
2056                }
2057                VmResponse::Err(SysError::new(ENOTSUP))
2058            }
2059            VmRequest::Swap(SwapCommand::Trim) => {
2060                #[cfg(feature = "swap")]
2061                if let Some(swap_controller) = swap_controller {
2062                    return match swap_controller.trim() {
2063                        Ok(()) => VmResponse::Ok,
2064                        Err(e) => {
2065                            error!("swap trim failed: {}", e);
2066                            VmResponse::Err(SysError::new(EINVAL))
2067                        }
2068                    };
2069                }
2070                VmResponse::Err(SysError::new(ENOTSUP))
2071            }
2072            VmRequest::Swap(SwapCommand::SwapOut) => {
2073                #[cfg(feature = "swap")]
2074                if let Some(swap_controller) = swap_controller {
2075                    return match swap_controller.swap_out() {
2076                        Ok(()) => VmResponse::Ok,
2077                        Err(e) => {
2078                            error!("swap out failed: {}", e);
2079                            VmResponse::Err(SysError::new(EINVAL))
2080                        }
2081                    };
2082                }
2083                VmResponse::Err(SysError::new(ENOTSUP))
2084            }
2085            VmRequest::Swap(SwapCommand::Disable {
2086                #[cfg(feature = "swap")]
2087                slow_file_cleanup,
2088                ..
2089            }) => {
2090                #[cfg(feature = "swap")]
2091                if let Some(swap_controller) = swap_controller {
2092                    return match swap_controller.disable(*slow_file_cleanup) {
2093                        Ok(()) => VmResponse::Ok,
2094                        Err(e) => {
2095                            error!("swap disable failed: {}", e);
2096                            VmResponse::Err(SysError::new(EINVAL))
2097                        }
2098                    };
2099                }
2100                VmResponse::Err(SysError::new(ENOTSUP))
2101            }
2102            VmRequest::Swap(SwapCommand::Status) => {
2103                #[cfg(feature = "swap")]
2104                if let Some(swap_controller) = swap_controller {
2105                    return match swap_controller.status() {
2106                        Ok(status) => VmResponse::SwapStatus(status),
2107                        Err(e) => {
2108                            error!("swap status failed: {}", e);
2109                            VmResponse::Err(SysError::new(EINVAL))
2110                        }
2111                    };
2112                }
2113                VmResponse::Err(SysError::new(ENOTSUP))
2114            }
2115            VmRequest::SuspendVm => {
2116                info!("Starting crosvm suspend");
2117                kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
2118                let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
2119                    Ok(state) => state,
2120                    Err(e) => {
2121                        error!("failed to get vcpu state: {e}");
2122                        return VmResponse::Err(SysError::new(EIO));
2123                    }
2124                };
2125                if current_mode != VmRunMode::Suspending {
2126                    error!("vCPUs failed to all suspend.");
2127                    return VmResponse::Err(SysError::new(EIO));
2128                }
2129                // Snapshot the pvclock ASAP after stopping vCPUs.
2130                if vm.check_capability(VmCap::PvClock) {
2131                    if suspended_pvclock_state.is_none() {
2132                        *suspended_pvclock_state = Some(match vm.get_pvclock() {
2133                            Ok(x) => x,
2134                            Err(e) => {
2135                                error!("suspend_pvclock failed: {e:?}");
2136                                return VmResponse::Err(SysError::new(EIO));
2137                            }
2138                        });
2139                    }
2140                }
2141                if let Err(e) = device_control_tube
2142                    .send(&DeviceControlCommand::SleepDevices)
2143                    .context("send command to devices control socket")
2144                {
2145                    error!("{:?}", e);
2146                    return VmResponse::Err(SysError::new(EIO));
2147                };
2148                match device_control_tube
2149                    .recv()
2150                    .context("receive from devices control socket")
2151                {
2152                    Ok(VmResponse::Ok) => {
2153                        info!("Finished crosvm suspend successfully");
2154                        VmResponse::Ok
2155                    }
2156                    Ok(resp) => {
2157                        error!("device sleep failed: {}", resp);
2158                        VmResponse::Err(SysError::new(EIO))
2159                    }
2160                    Err(e) => {
2161                        error!("receive from devices control socket: {:?}", e);
2162                        VmResponse::Err(SysError::new(EIO))
2163                    }
2164                }
2165            }
2166            VmRequest::ResumeVm => {
2167                info!("Starting crosvm resume");
2168                if let Err(e) = device_control_tube
2169                    .send(&DeviceControlCommand::WakeDevices)
2170                    .context("send command to devices control socket")
2171                {
2172                    error!("{:?}", e);
2173                    return VmResponse::Err(SysError::new(EIO));
2174                };
2175                match device_control_tube
2176                    .recv()
2177                    .context("receive from devices control socket")
2178                {
2179                    Ok(VmResponse::Ok) => {
2180                        info!("Finished crosvm resume successfully");
2181                    }
2182                    Ok(resp) => {
2183                        error!("device wake failed: {}", resp);
2184                        return VmResponse::Err(SysError::new(EIO));
2185                    }
2186                    Err(e) => {
2187                        error!("receive from devices control socket: {:?}", e);
2188                        return VmResponse::Err(SysError::new(EIO));
2189                    }
2190                }
2191                // Resume the pvclock as late as possible before starting vCPUs.
2192                if vm.check_capability(VmCap::PvClock) {
2193                    // If None, then we aren't suspended, which is a valid case.
2194                    if let Some(x) = suspended_pvclock_state {
2195                        if let Err(e) = vm.set_pvclock(x) {
2196                            error!("resume_pvclock failed: {e:?}");
2197                            return VmResponse::Err(SysError::new(EIO));
2198                        }
2199                    }
2200                }
2201                kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2202                VmResponse::Ok
2203            }
2204            VmRequest::Gpe { gpe, clear_evt } => {
2205                if let Some(pm) = pm.as_ref() {
2206                    match clear_evt.as_ref().map(|e| e.try_clone()).transpose() {
2207                        Ok(clear_evt) => {
2208                            pm.lock().gpe_evt(*gpe, clear_evt);
2209                            VmResponse::Ok
2210                        }
2211                        Err(err) => {
2212                            error!("Error cloning clear_evt: {:?}", err);
2213                            VmResponse::Err(SysError::new(EIO))
2214                        }
2215                    }
2216                } else {
2217                    error!("{:#?} not supported", *self);
2218                    VmResponse::Err(SysError::new(ENOTSUP))
2219                }
2220            }
2221            VmRequest::PciPme(requester_id) => {
2222                if let Some(pm) = pm.as_ref() {
2223                    pm.lock().pme_evt(*requester_id);
2224                    VmResponse::Ok
2225                } else {
2226                    error!("{:#?} not supported", *self);
2227                    VmResponse::Err(SysError::new(ENOTSUP))
2228                }
2229            }
2230            VmRequest::MakeRT => {
2231                kick_vcpus(VcpuControl::MakeRT);
2232                VmResponse::Ok
2233            }
2234            #[cfg(feature = "balloon")]
2235            VmRequest::BalloonCommand(_) => unreachable!("Should be handled with BalloonTube"),
2236            VmRequest::DiskCommand {
2237                disk_index,
2238                ref command,
2239            } => match &disk_host_tubes.get(*disk_index) {
2240                Some(tube) => handle_disk_command(command, tube),
2241                None => VmResponse::Err(SysError::new(ENODEV)),
2242            },
2243            VmRequest::GpuCommand(ref cmd) => match gpu_control_tube {
2244                Some(gpu_control) => {
2245                    let res = gpu_control.send(cmd);
2246                    if let Err(e) = res {
2247                        error!("fail to send command to gpu control socket: {}", e);
2248                        return VmResponse::Err(SysError::new(EIO));
2249                    }
2250                    match gpu_control.recv() {
2251                        Ok(response) => VmResponse::GpuResponse(response),
2252                        Err(e) => {
2253                            error!("fail to recv command from gpu control socket: {}", e);
2254                            VmResponse::Err(SysError::new(EIO))
2255                        }
2256                    }
2257                }
2258                None => {
2259                    error!("gpu control is not enabled in crosvm");
2260                    VmResponse::Err(SysError::new(EIO))
2261                }
2262            },
2263            VmRequest::UsbCommand(ref cmd) => {
2264                let usb_control_tube = match usb_control_tube {
2265                    Some(t) => t,
2266                    None => {
2267                        error!("attempted to execute USB request without control tube");
2268                        return VmResponse::Err(SysError::new(ENODEV));
2269                    }
2270                };
2271                let res = usb_control_tube.send(cmd);
2272                if let Err(e) = res {
2273                    error!("fail to send command to usb control socket: {}", e);
2274                    return VmResponse::Err(SysError::new(EIO));
2275                }
2276                match usb_control_tube.recv() {
2277                    Ok(response) => VmResponse::UsbResponse(response),
2278                    Err(e) => {
2279                        error!("fail to recv command from usb control socket: {}", e);
2280                        VmResponse::Err(SysError::new(EIO))
2281                    }
2282                }
2283            }
2284            VmRequest::BatCommand(type_, ref cmd) => {
2285                match bat_control {
2286                    Some(battery) => {
2287                        if battery.type_ != *type_ {
2288                            error!("ignored battery command due to battery type: expected {:?}, got {:?}", battery.type_, type_);
2289                            return VmResponse::Err(SysError::new(EINVAL));
2290                        }
2291
2292                        let res = battery.control_tube.send(cmd);
2293                        if let Err(e) = res {
2294                            error!("fail to send command to bat control socket: {}", e);
2295                            return VmResponse::Err(SysError::new(EIO));
2296                        }
2297
2298                        match battery.control_tube.recv() {
2299                            Ok(response) => VmResponse::BatResponse(response),
2300                            Err(e) => {
2301                                error!("fail to recv command from bat control socket: {}", e);
2302                                VmResponse::Err(SysError::new(EIO))
2303                            }
2304                        }
2305                    }
2306                    None => VmResponse::BatResponse(BatControlResult::NoBatDevice),
2307                }
2308            }
2309            #[cfg(feature = "audio")]
2310            VmRequest::SndCommand(ref cmd) => match cmd {
2311                SndControlCommand::MuteAll(muted) => {
2312                    for tube in snd_host_tubes {
2313                        let res = tube.send(&SndControlCommand::MuteAll(*muted));
2314                        if let Err(e) = res {
2315                            error!("fail to send command to snd control socket: {}", e);
2316                            return VmResponse::Err(SysError::new(EIO));
2317                        }
2318
2319                        match tube.recv() {
2320                            Ok(VmResponse::Ok) => {
2321                                debug!("device is successfully muted");
2322                            }
2323                            Ok(resp) => {
2324                                error!("mute failed: {}", resp);
2325                                return VmResponse::ErrString("fail to mute the device".to_owned());
2326                            }
2327                            Err(e) => return VmResponse::Err(SysError::new(EIO)),
2328                        }
2329                    }
2330                    VmResponse::Ok
2331                }
2332            },
2333            VmRequest::HotPlugVfioCommand { device: _, add: _ } => VmResponse::Ok,
2334            #[cfg(feature = "pci-hotplug")]
2335            VmRequest::HotPlugNetCommand(ref _net_cmd) => {
2336                VmResponse::ErrString("hot plug not supported".to_owned())
2337            }
2338            VmRequest::Snapshot(SnapshotCommand::Take {
2339                ref snapshot_path,
2340                compress_memory,
2341                encrypt,
2342            }) => {
2343                info!("Starting crosvm snapshot");
2344                match do_snapshot(
2345                    snapshot_path.to_path_buf(),
2346                    kick_vcpus,
2347                    irq_handler_control,
2348                    device_control_tube,
2349                    vcpu_size,
2350                    snapshot_irqchip,
2351                    *compress_memory,
2352                    *encrypt,
2353                    suspended_pvclock_state,
2354                    vm,
2355                ) {
2356                    Ok(()) => {
2357                        info!("Finished crosvm snapshot successfully");
2358                        VmResponse::Ok
2359                    }
2360                    Err(e) => {
2361                        error!("failed to handle snapshot: {:?}", e);
2362                        VmResponse::Err(SysError::new(EIO))
2363                    }
2364                }
2365            }
2366            VmRequest::RegisterListener {
2367                socket_addr: _,
2368                event: _,
2369            } => VmResponse::Ok,
2370            VmRequest::UnregisterListener {
2371                socket_addr: _,
2372                event: _,
2373            } => VmResponse::Ok,
2374            VmRequest::Unregister { socket_addr: _ } => VmResponse::Ok,
2375            VmRequest::VcpuPidTid => unreachable!(),
2376            VmRequest::Throttle(_, _) => unreachable!(),
2377            VmRequest::GetVmDescriptor => {
2378                let vm_fd = match vm.try_clone_descriptor() {
2379                    Ok(vm_fd) => vm_fd,
2380                    Err(e) => {
2381                        error!("failed to get vm_fd: {:?}", e);
2382                        return VmResponse::Err(e);
2383                    }
2384                };
2385                VmResponse::VmDescriptor {
2386                    hypervisor: vm.hypervisor_kind(),
2387                    vm_fd,
2388                }
2389            }
2390            VmRequest::RegisterMemory { .. } => unreachable!(),
2391            VmRequest::UnregisterMemory { .. } => unreachable!(),
2392        }
2393    }
2394}
2395
2396/// Snapshot the VM to file at `snapshot_path`
2397fn do_snapshot(
2398    snapshot_path: PathBuf,
2399    kick_vcpus: impl Fn(VcpuControl),
2400    irq_handler_control: &Tube,
2401    device_control_tube: &Tube,
2402    vcpu_size: usize,
2403    snapshot_irqchip: impl Fn() -> anyhow::Result<AnySnapshot>,
2404    compress_memory: bool,
2405    encrypt: bool,
2406    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2407    vm: &dyn Vm,
2408) -> anyhow::Result<()> {
2409    let snapshot_start = Instant::now();
2410
2411    let _vcpu_guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size)?;
2412    let _device_guard = DeviceSleepGuard::new(device_control_tube)?;
2413
2414    // We want to flush all pending IRQs to the interrupt controller. There are two cases:
2415    //
2416    // MSIs: these are directly delivered to the interrupt controller.
2417    // We must verify the handler thread cycles once to deliver these interrupts.
2418    //
2419    // Legacy interrupts: in the case of a split IRQ chip, these interrupts may
2420    // flow through the userspace IOAPIC. If the hypervisor does not support
2421    // irqfds (e.g. WHPX), a single iteration will only flush the IRQ to the
2422    // IOAPIC. The underlying MSI will be asserted at this point, but if the
2423    // IRQ handler doesn't run another iteration, it won't be delivered to the
2424    // interrupt controller. This is why we cycle the handler thread twice (doing so
2425    // ensures we process the underlying MSI).
2426    //
2427    // We can handle both of these cases by iterating until there are no tokens
2428    // serviced on the requested iteration. Note that in the legacy case, this
2429    // ensures at least two iterations.
2430    //
2431    // Note: within CrosVM, *all* interrupts are eventually converted into the
2432    // same mechanicism that MSIs use. This is why we say "underlying" MSI for
2433    // a legacy IRQ.
2434    {
2435        let mut flush_attempts = 0;
2436        loop {
2437            irq_handler_control
2438                .send(&IrqHandlerRequest::WakeAndNotifyIteration)
2439                .context("failed to send flush command to IRQ handler thread")?;
2440            let resp = irq_handler_control
2441                .recv()
2442                .context("failed to recv flush response from IRQ handler thread")?;
2443            match resp {
2444                IrqHandlerResponse::HandlerIterationComplete(tokens_serviced) => {
2445                    if tokens_serviced == 0 {
2446                        break;
2447                    }
2448                }
2449                _ => bail!("received unexpected reply from IRQ handler: {:?}", resp),
2450            }
2451            flush_attempts += 1;
2452            if flush_attempts > EXPECTED_MAX_IRQ_FLUSH_ITERATIONS {
2453                warn!(
2454                    "flushing IRQs for snapshot may be stalled after iteration {}, expected <= {}
2455                      iterations",
2456                    flush_attempts, EXPECTED_MAX_IRQ_FLUSH_ITERATIONS
2457                );
2458            }
2459        }
2460        info!("flushed IRQs in {} iterations", flush_attempts);
2461    }
2462    let snapshot_writer = SnapshotWriter::new(snapshot_path, encrypt)?;
2463
2464    // Snapshot hypervisor's paravirtualized clock.
2465    snapshot_writer.write_fragment("pvclock", &AnySnapshot::to_any(suspended_pvclock_state)?)?;
2466
2467    // Snapshot Vcpus
2468    info!("VCPUs snapshotting...");
2469    let (send_chan, recv_chan) = mpsc::channel();
2470    kick_vcpus(VcpuControl::Snapshot(
2471        snapshot_writer.add_namespace("vcpu")?,
2472        send_chan,
2473    ));
2474    // Validate all Vcpus snapshot successfully
2475    for _ in 0..vcpu_size {
2476        recv_chan
2477            .recv()
2478            .context("Failed to recv Vcpu snapshot response")?
2479            .context("Failed to snapshot Vcpu")?;
2480    }
2481    info!("VCPUs snapshotted.");
2482
2483    // Snapshot irqchip
2484    info!("Snapshotting irqchip...");
2485    let irqchip_snap = snapshot_irqchip()?;
2486    snapshot_writer
2487        .write_fragment("irqchip", &irqchip_snap)
2488        .context("Failed to write irqchip state")?;
2489    info!("Snapshotted irqchip.");
2490
2491    // Snapshot memory
2492    {
2493        let mem_snap_start = Instant::now();
2494        // Use 64MB chunks when writing the memory snapshot (if encryption is used).
2495        const MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES: usize = 1024 * 1024 * 64;
2496        // SAFETY:
2497        // VM & devices are stopped.
2498        let guest_memory_metadata = unsafe {
2499            vm.get_memory()
2500                .snapshot(
2501                    &mut snapshot_writer.raw_fragment_with_chunk_size(
2502                        "mem",
2503                        MEMORY_SNAP_ENCRYPTED_CHUNK_SIZE_BYTES,
2504                    )?,
2505                    compress_memory,
2506                )
2507                .context("failed to snapshot memory")?
2508        };
2509        snapshot_writer.write_fragment("mem_metadata", &guest_memory_metadata)?;
2510
2511        let mem_snap_duration_ms = mem_snap_start.elapsed().as_millis();
2512        info!(
2513            "snapshot: memory snapshotted {}MB in {}ms",
2514            vm.get_memory().memory_size() / 1024 / 1024,
2515            mem_snap_duration_ms
2516        );
2517        metrics::log_metric_with_details(
2518            metrics::MetricEventType::SnapshotSaveMemoryLatency,
2519            mem_snap_duration_ms as i64,
2520            &metrics_events::RecordDetails {},
2521        );
2522    }
2523    // Snapshot devices
2524    info!("Devices snapshotting...");
2525    device_control_tube
2526        .send(&DeviceControlCommand::SnapshotDevices { snapshot_writer })
2527        .context("send command to devices control socket")?;
2528    let resp: VmResponse = device_control_tube
2529        .recv()
2530        .context("receive from devices control socket")?;
2531    if !matches!(resp, VmResponse::Ok) {
2532        bail!("unexpected SnapshotDevices response: {resp}");
2533    }
2534    info!("Devices snapshotted.");
2535
2536    let snap_duration_ms = snapshot_start.elapsed().as_millis();
2537    info!(
2538        "snapshot: completed snapshot in {}ms; VM mem size: {}MB",
2539        snap_duration_ms,
2540        vm.get_memory().memory_size() / 1024 / 1024,
2541    );
2542    metrics::log_metric_with_details(
2543        metrics::MetricEventType::SnapshotSaveOverallLatency,
2544        snap_duration_ms as i64,
2545        &metrics_events::RecordDetails {},
2546    );
2547    Ok(())
2548}
2549
2550/// Restore the VM to the snapshot at `restore_path`.
2551///
2552/// Same as `VmRequest::execute` with a `VmRequest::Restore`. Exposed as a separate function
2553/// because not all the `VmRequest::execute` arguments are available in the "cold restore" flow.
2554pub fn do_restore(
2555    restore_path: &Path,
2556    kick_vcpus: impl Fn(VcpuControl),
2557    kick_vcpu: impl Fn(VcpuControl, usize),
2558    irq_handler_control: &Tube,
2559    device_control_tube: &Tube,
2560    vcpu_size: usize,
2561    mut restore_irqchip: impl FnMut(AnySnapshot) -> anyhow::Result<()>,
2562    require_encrypted: bool,
2563    suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2564    vm: &dyn Vm,
2565) -> anyhow::Result<()> {
2566    let restore_start = Instant::now();
2567    let _guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size);
2568    let _devices_guard = DeviceSleepGuard::new(device_control_tube)?;
2569
2570    let snapshot_reader = SnapshotReader::new(restore_path, require_encrypted)?;
2571
2572    // Restore hypervisor's paravirtualized clock.
2573    *suspended_pvclock_state = snapshot_reader.read_fragment("pvclock")?;
2574
2575    // Restore IrqChip
2576    let irq_snapshot: AnySnapshot = snapshot_reader.read_fragment("irqchip")?;
2577    restore_irqchip(irq_snapshot)?;
2578
2579    // Restore Vcpu(s)
2580    let vcpu_snapshot_reader = snapshot_reader.namespace("vcpu")?;
2581    let vcpu_snapshot_count = vcpu_snapshot_reader.list_fragments()?.len();
2582    if vcpu_snapshot_count != vcpu_size {
2583        bail!(
2584            "bad cpu count in snapshot: expected={} got={}",
2585            vcpu_size,
2586            vcpu_snapshot_count,
2587        );
2588    }
2589    #[cfg(target_arch = "x86_64")]
2590    let host_tsc_reference_moment = {
2591        // SAFETY: rdtsc takes no arguments.
2592        unsafe { _rdtsc() }
2593    };
2594    let (send_chan, recv_chan) = mpsc::channel();
2595    for vcpu_id in 0..vcpu_size {
2596        kick_vcpu(
2597            VcpuControl::Restore(VcpuRestoreRequest {
2598                result_sender: send_chan.clone(),
2599                snapshot_reader: vcpu_snapshot_reader.clone(),
2600                #[cfg(target_arch = "x86_64")]
2601                host_tsc_reference_moment,
2602            }),
2603            vcpu_id,
2604        );
2605    }
2606    for _ in 0..vcpu_size {
2607        recv_chan
2608            .recv()
2609            .context("Failed to recv restore response")?
2610            .context("Failed to restore vcpu")?;
2611    }
2612
2613    // Restore Memory
2614    {
2615        let mem_restore_start = Instant::now();
2616        let guest_memory_metadata = snapshot_reader.read_fragment("mem_metadata")?;
2617        // SAFETY:
2618        // VM & devices are stopped.
2619        unsafe {
2620            vm.get_memory().restore(
2621                guest_memory_metadata,
2622                &mut snapshot_reader.raw_fragment("mem")?,
2623            )?
2624        };
2625        let mem_restore_duration_ms = mem_restore_start.elapsed().as_millis();
2626        info!(
2627            "snapshot: memory restored {}MB in {}ms",
2628            vm.get_memory().memory_size() / 1024 / 1024,
2629            mem_restore_duration_ms
2630        );
2631        metrics::log_metric_with_details(
2632            metrics::MetricEventType::SnapshotRestoreMemoryLatency,
2633            mem_restore_duration_ms as i64,
2634            &metrics_events::RecordDetails {},
2635        );
2636    }
2637    // Restore devices
2638    device_control_tube
2639        .send(&DeviceControlCommand::RestoreDevices {
2640            snapshot_reader: snapshot_reader.clone(),
2641        })
2642        .context("send restore devices command to devices control socket")?;
2643    let resp: VmResponse = device_control_tube
2644        .recv()
2645        .context("receive from devices control socket")?;
2646    if !matches!(resp, VmResponse::Ok) {
2647        bail!("unexpected RestoreDevices response: {resp}");
2648    }
2649
2650    // refresh the IRQ tokens.
2651    {
2652        irq_handler_control
2653            .send(&IrqHandlerRequest::RefreshIrqEventTokens)
2654            .context("failed to send refresh irq event token command to IRQ handler thread")?;
2655        let resp: IrqHandlerResponse = irq_handler_control
2656            .recv()
2657            .context("failed to recv refresh response from IRQ handler thread")?;
2658        if !matches!(resp, IrqHandlerResponse::IrqEventTokenRefreshComplete) {
2659            bail!(
2660                "received unexpected reply from IRQ handler thread: {:?}",
2661                resp
2662            );
2663        }
2664    }
2665
2666    let restore_duration_ms = restore_start.elapsed().as_millis();
2667    info!(
2668        "snapshot: completed restore in {}ms; mem size: {}",
2669        restore_duration_ms,
2670        vm.get_memory().memory_size(),
2671    );
2672
2673    metrics::log_metric_with_details(
2674        metrics::MetricEventType::SnapshotRestoreOverallLatency,
2675        restore_duration_ms as i64,
2676        &metrics_events::RecordDetails {},
2677    );
2678    Ok(())
2679}
2680
2681pub type HypervisorKind = hypervisor::HypervisorKind;
2682
2683/// Indication of success or failure of a `VmRequest`.
2684///
2685/// Success is usually indicated `VmResponse::Ok` unless there is data associated with the response.
2686#[derive(Serialize, Deserialize, Debug)]
2687#[must_use]
2688pub enum VmResponse {
2689    /// Indicates the request was executed successfully.
2690    Ok,
2691    /// Indicates the request encountered some error during execution.
2692    Err(SysError),
2693    /// Indicates the request encountered some error during execution.
2694    ErrString(String),
2695    /// The memory was registered into guest address space in memory slot number `slot`.
2696    RegisterMemory { slot: u32 },
2697    /// Variant of the register memory but with region_id.
2698    RegisterMemory2 { region_id: u64 },
2699    /// Results of balloon control commands.
2700    #[cfg(feature = "balloon")]
2701    BalloonStats {
2702        stats: balloon_control::BalloonStats,
2703        balloon_actual: u64,
2704    },
2705    /// Results of balloon WS-R command
2706    #[cfg(feature = "balloon")]
2707    BalloonWS {
2708        ws: balloon_control::BalloonWS,
2709        balloon_actual: u64,
2710    },
2711    /// Results of PCI hot plug
2712    #[cfg(feature = "pci-hotplug")]
2713    PciHotPlugResponse { bus: u8 },
2714    /// Results of usb control commands.
2715    UsbResponse(UsbControlResult),
2716    /// Results of gpu control commands.
2717    GpuResponse(GpuControlResult),
2718    /// Results of battery control commands.
2719    BatResponse(BatControlResult),
2720    /// Results of swap status command.
2721    SwapStatus(SwapStatus),
2722    /// Gets the state of Devices (sleep/wake)
2723    DevicesState(DevicesState),
2724    /// Map of the Vcpu PID/TIDs
2725    VcpuPidTidResponse {
2726        pid_tid_map: BTreeMap<usize, (u32, u32)>,
2727    },
2728    VmDescriptor {
2729        hypervisor: HypervisorKind,
2730        vm_fd: SafeDescriptor,
2731    },
2732}
2733
2734impl Display for VmResponse {
2735    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2736        use self::VmResponse::*;
2737
2738        match self {
2739            Ok => write!(f, "ok"),
2740            Err(e) => write!(f, "error: {e}"),
2741            ErrString(e) => write!(f, "error: {e}"),
2742            RegisterMemory { slot } => write!(f, "memory registered in slot {slot}"),
2743            RegisterMemory2 { region_id } => {
2744                write!(f, "memory registered in region id {region_id}")
2745            }
2746            #[cfg(feature = "balloon")]
2747            VmResponse::BalloonStats {
2748                stats,
2749                balloon_actual,
2750            } => {
2751                write!(
2752                    f,
2753                    "stats: {}\nballoon_actual: {}",
2754                    serde_json::to_string_pretty(&stats)
2755                        .unwrap_or_else(|_| "invalid_response".to_string()),
2756                    balloon_actual
2757                )
2758            }
2759            #[cfg(feature = "balloon")]
2760            VmResponse::BalloonWS { ws, balloon_actual } => {
2761                write!(
2762                    f,
2763                    "ws: {}, balloon_actual: {}",
2764                    serde_json::to_string_pretty(&ws)
2765                        .unwrap_or_else(|_| "invalid_response".to_string()),
2766                    balloon_actual,
2767                )
2768            }
2769            UsbResponse(result) => write!(f, "usb control request get result {result:?}"),
2770            #[cfg(feature = "pci-hotplug")]
2771            PciHotPlugResponse { bus } => write!(f, "pci hotplug bus {bus:?}"),
2772            GpuResponse(result) => write!(f, "gpu control request result {result:?}"),
2773            BatResponse(result) => write!(f, "{result}"),
2774            SwapStatus(status) => {
2775                write!(
2776                    f,
2777                    "{}",
2778                    serde_json::to_string(&status)
2779                        .unwrap_or_else(|_| "invalid_response".to_string()),
2780                )
2781            }
2782            DevicesState(status) => write!(f, "devices status: {status:?}"),
2783            VcpuPidTidResponse { pid_tid_map } => write!(f, "vcpu pid tid map: {pid_tid_map:?}"),
2784            VmDescriptor { hypervisor, vm_fd } => {
2785                write!(f, "hypervisor: {hypervisor:?}, vm_fd: {vm_fd:?}")
2786            }
2787        }
2788    }
2789}
2790
2791/// Enum that allows remote control of a wait context (used between the Windows GpuDisplay & the
2792/// GPU worker).
2793#[derive(Serialize, Deserialize)]
2794pub enum ModifyWaitContext {
2795    Add(#[serde(with = "with_as_descriptor")] Descriptor),
2796}
2797
2798#[sorted]
2799#[derive(Error, Debug)]
2800pub enum VirtioIOMMUVfioError {
2801    #[error("socket failed")]
2802    SocketFailed,
2803    #[error("unexpected response: {0}")]
2804    UnexpectedResponse(VirtioIOMMUResponse),
2805    #[error("unknown command: `{0}`")]
2806    UnknownCommand(String),
2807    #[error("{0}")]
2808    VfioControl(VirtioIOMMUVfioResult),
2809}
2810
2811#[derive(Serialize, Deserialize, Debug)]
2812pub enum VirtioIOMMUVfioCommand {
2813    // Add the vfio device attached to virtio-iommu.
2814    VfioDeviceAdd {
2815        endpoint_addr: u32,
2816        wrapper_id: u32,
2817        #[serde(with = "with_as_descriptor")]
2818        container: File,
2819    },
2820    // Delete the vfio device attached to virtio-iommu.
2821    VfioDeviceDel {
2822        endpoint_addr: u32,
2823    },
2824    // Map a dma-buf into vfio iommu table
2825    VfioDmabufMap {
2826        region_id: VmMemoryRegionId,
2827        gpa: u64,
2828        size: u64,
2829        dma_buf: SafeDescriptor,
2830    },
2831    // Unmap a dma-buf from vfio iommu table
2832    VfioDmabufUnmap(VmMemoryRegionId),
2833}
2834
2835#[derive(Serialize, Deserialize, Debug)]
2836pub enum VirtioIOMMUVfioResult {
2837    Ok,
2838    NotInPCIRanges,
2839    NoAvailableContainer,
2840    NoSuchDevice,
2841    NoSuchMappedDmabuf,
2842    InvalidParam,
2843}
2844
2845impl Display for VirtioIOMMUVfioResult {
2846    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2847        use self::VirtioIOMMUVfioResult::*;
2848
2849        match self {
2850            Ok => write!(f, "successfully"),
2851            NotInPCIRanges => write!(f, "not in the pci ranges of virtio-iommu"),
2852            NoAvailableContainer => write!(f, "no available vfio container"),
2853            NoSuchDevice => write!(f, "no such a vfio device"),
2854            NoSuchMappedDmabuf => write!(f, "no such a mapped dmabuf"),
2855            InvalidParam => write!(f, "invalid parameters"),
2856        }
2857    }
2858}
2859
2860/// A request to the virtio-iommu process to perform some operations.
2861///
2862/// Unless otherwise noted, each request should expect a `VirtioIOMMUResponse::Ok` to be received on
2863/// success.
2864#[derive(Serialize, Deserialize, Debug)]
2865pub enum VirtioIOMMURequest {
2866    /// Command for vfio related operations.
2867    VfioCommand(VirtioIOMMUVfioCommand),
2868}
2869
2870/// Indication of success or failure of a `VirtioIOMMURequest`.
2871///
2872/// Success is usually indicated `VirtioIOMMUResponse::Ok` unless there is data associated with the
2873/// response.
2874#[derive(Serialize, Deserialize, Debug)]
2875pub enum VirtioIOMMUResponse {
2876    /// Indicates the request was executed successfully.
2877    Ok,
2878    /// Indicates the request encountered some error during execution.
2879    Err(SysError),
2880    /// Results for Vfio commands.
2881    VfioResponse(VirtioIOMMUVfioResult),
2882}
2883
2884impl Display for VirtioIOMMUResponse {
2885    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2886        use self::VirtioIOMMUResponse::*;
2887        match self {
2888            Ok => write!(f, "ok"),
2889            Err(e) => write!(f, "error: {e}"),
2890            VfioResponse(result) => write!(
2891                f,
2892                "The vfio-related virtio-iommu request got result: {result:?}"
2893            ),
2894        }
2895    }
2896}
2897
2898/// Send VirtioIOMMURequest without waiting for the response
2899pub fn virtio_iommu_request_async(
2900    iommu_control_tube: &Tube,
2901    req: &VirtioIOMMURequest,
2902) -> VirtioIOMMUResponse {
2903    match iommu_control_tube.send(&req) {
2904        Ok(_) => VirtioIOMMUResponse::Ok,
2905        Err(e) => {
2906            error!("virtio-iommu socket send failed: {:?}", e);
2907            VirtioIOMMUResponse::Err(SysError::last())
2908        }
2909    }
2910}
2911
2912pub type VirtioIOMMURequestResult = std::result::Result<VirtioIOMMUResponse, ()>;
2913
2914/// Send VirtioIOMMURequest and wait to get the response
2915pub fn virtio_iommu_request(
2916    iommu_control_tube: &Tube,
2917    req: &VirtioIOMMURequest,
2918) -> VirtioIOMMURequestResult {
2919    let response = match virtio_iommu_request_async(iommu_control_tube, req) {
2920        VirtioIOMMUResponse::Ok => match iommu_control_tube.recv() {
2921            Ok(response) => response,
2922            Err(e) => {
2923                error!("virtio-iommu socket recv failed: {:?}", e);
2924                VirtioIOMMUResponse::Err(SysError::last())
2925            }
2926        },
2927        resp => resp,
2928    };
2929    Ok(response)
2930}
2931
2932#[cfg(test)]
2933mod tests {
2934    use anyhow::anyhow;
2935
2936    use super::*;
2937
2938    #[test]
2939    fn vm_memory_response_error_should_serialize_and_deserialize_correctly() {
2940        let source_error: VmMemoryResponseError = anyhow!("root cause")
2941            .context("context 1")
2942            .context("context 2")
2943            .into();
2944        let serialized_bytes =
2945            serde_json::to_vec(&source_error).expect("should serialize to json successfully");
2946        let target_error = serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2947            .expect("should deserialize from json successfully");
2948        assert_eq!(source_error.0.to_string(), target_error.0.to_string());
2949        assert_eq!(
2950            source_error
2951                .0
2952                .chain()
2953                .map(ToString::to_string)
2954                .collect::<Vec<_>>(),
2955            target_error
2956                .0
2957                .chain()
2958                .map(ToString::to_string)
2959                .collect::<Vec<_>>()
2960        );
2961    }
2962
2963    #[test]
2964    fn vm_memory_response_error_deserialization_should_handle_malformat_correctly() {
2965        let flat_source = FlatVmMemoryResponseError(vec![]);
2966        let serialized_bytes =
2967            serde_json::to_vec(&flat_source).expect("should serialize to json successfully");
2968        serde_json::from_slice::<VmMemoryResponseError>(&serialized_bytes)
2969            .expect_err("deserialize with 0 error messages should fail");
2970    }
2971}