devices/virtio/
virtio_pci_device.rs

1// Copyright 2018 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::collections::BTreeMap;
6use std::sync::Arc;
7
8#[cfg(target_arch = "x86_64")]
9use acpi_tables::sdt::SDT;
10use anyhow::anyhow;
11use anyhow::Context;
12use base::debug;
13use base::error;
14use base::trace;
15use base::AsRawDescriptor;
16use base::AsRawDescriptors;
17use base::Event;
18use base::Protection;
19use base::RawDescriptor;
20use base::Result;
21use base::SharedMemory;
22use base::Tube;
23use base::WorkerThread;
24use data_model::Le32;
25use hypervisor::Datamatch;
26use hypervisor::MemCacheType;
27use libc::ERANGE;
28#[cfg(target_arch = "x86_64")]
29use metrics::MetricEventType;
30use resources::AddressRange;
31use resources::Alloc;
32use resources::AllocOptions;
33use resources::SystemAllocator;
34use serde::Deserialize;
35use serde::Serialize;
36use snapshot::AnySnapshot;
37use sync::Mutex;
38use virtio_sys::virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE;
39use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER;
40use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER_OK;
41use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FAILED;
42use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FEATURES_OK;
43use virtio_sys::virtio_config::VIRTIO_CONFIG_S_NEEDS_RESET;
44use virtio_sys::virtio_config::VIRTIO_CONFIG_S_SUSPEND;
45use vm_control::api::VmMemoryClient;
46use vm_control::VmMemoryDestination;
47use vm_control::VmMemoryRegionId;
48use vm_control::VmMemorySource;
49use vm_memory::GuestMemory;
50use zerocopy::FromBytes;
51use zerocopy::Immutable;
52use zerocopy::IntoBytes;
53use zerocopy::KnownLayout;
54
55use self::virtio_pci_common_config::VirtioPciCommonConfig;
56use super::*;
57#[cfg(target_arch = "x86_64")]
58use crate::acpi::PmWakeupEvent;
59#[cfg(target_arch = "x86_64")]
60use crate::pci::pm::PciDevicePower;
61use crate::pci::pm::PciPmCap;
62use crate::pci::pm::PmConfig;
63use crate::pci::pm::PmStatusChange;
64use crate::pci::BarRange;
65use crate::pci::MsixCap;
66use crate::pci::MsixConfig;
67use crate::pci::MsixStatus;
68use crate::pci::PciAddress;
69use crate::pci::PciBarConfiguration;
70use crate::pci::PciBarIndex;
71use crate::pci::PciBarPrefetchable;
72use crate::pci::PciBarRegionType;
73use crate::pci::PciBaseSystemPeripheralSubclass;
74use crate::pci::PciCapability;
75use crate::pci::PciCapabilityID;
76use crate::pci::PciClassCode;
77use crate::pci::PciConfiguration;
78use crate::pci::PciDevice;
79use crate::pci::PciDeviceError;
80use crate::pci::PciDisplaySubclass;
81use crate::pci::PciHeaderType;
82use crate::pci::PciId;
83use crate::pci::PciInputDeviceSubclass;
84use crate::pci::PciInterruptPin;
85use crate::pci::PciMassStorageSubclass;
86use crate::pci::PciMultimediaSubclass;
87use crate::pci::PciNetworkControllerSubclass;
88use crate::pci::PciSimpleCommunicationControllerSubclass;
89use crate::pci::PciSubclass;
90use crate::pci::PciWirelessControllerSubclass;
91use crate::virtio::ipc_memory_mapper::IpcMemoryMapper;
92#[cfg(feature = "pci-hotplug")]
93use crate::HotPluggable;
94use crate::IrqLevelEvent;
95use crate::Suspendable;
96
97#[repr(u8)]
98#[derive(Debug, Copy, Clone, enumn::N)]
99pub enum PciCapabilityType {
100    CommonConfig = 1,
101    NotifyConfig = 2,
102    IsrConfig = 3,
103    DeviceConfig = 4,
104    PciConfig = 5,
105    // Doorbell, Notification and SharedMemory are Virtio Vhost User related PCI
106    // capabilities. Specified in 5.7.7.4 here
107    // https://stefanha.github.io/virtio/vhost-user-slave.html#x1-2830007.
108    DoorbellConfig = 6,
109    NotificationConfig = 7,
110    SharedMemoryConfig = 8,
111}
112
113#[allow(dead_code)]
114#[repr(C)]
115#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
116pub struct VirtioPciCap {
117    // cap_vndr and cap_next are autofilled based on id() in pci configuration
118    pub cap_vndr: u8, // Generic PCI field: PCI_CAP_ID_VNDR
119    pub cap_next: u8, // Generic PCI field: next ptr
120    pub cap_len: u8,  // Generic PCI field: capability length
121    pub cfg_type: u8, // Identifies the structure.
122    pub bar: u8,      // Where to find it.
123    id: u8,           // Multiple capabilities of the same type
124    padding: [u8; 2], // Pad to full dword.
125    pub offset: Le32, // Offset within bar.
126    pub length: Le32, // Length of the structure, in bytes.
127}
128
129impl PciCapability for VirtioPciCap {
130    fn bytes(&self) -> &[u8] {
131        self.as_bytes()
132    }
133
134    fn id(&self) -> PciCapabilityID {
135        PciCapabilityID::VendorSpecific
136    }
137
138    fn writable_bits(&self) -> Vec<u32> {
139        vec![0u32; 4]
140    }
141}
142
143impl VirtioPciCap {
144    pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self {
145        VirtioPciCap {
146            cap_vndr: 0,
147            cap_next: 0,
148            cap_len: std::mem::size_of::<VirtioPciCap>() as u8,
149            cfg_type: cfg_type as u8,
150            bar,
151            id: 0,
152            padding: [0; 2],
153            offset: Le32::from(offset),
154            length: Le32::from(length),
155        }
156    }
157
158    pub fn set_cap_len(&mut self, cap_len: u8) {
159        self.cap_len = cap_len;
160    }
161}
162
163#[allow(dead_code)]
164#[repr(C)]
165#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
166pub struct VirtioPciNotifyCap {
167    cap: VirtioPciCap,
168    notify_off_multiplier: Le32,
169}
170
171impl PciCapability for VirtioPciNotifyCap {
172    fn bytes(&self) -> &[u8] {
173        self.as_bytes()
174    }
175
176    fn id(&self) -> PciCapabilityID {
177        PciCapabilityID::VendorSpecific
178    }
179
180    fn writable_bits(&self) -> Vec<u32> {
181        vec![0u32; 5]
182    }
183}
184
185impl VirtioPciNotifyCap {
186    pub fn new(
187        cfg_type: PciCapabilityType,
188        bar: u8,
189        offset: u32,
190        length: u32,
191        multiplier: Le32,
192    ) -> Self {
193        VirtioPciNotifyCap {
194            cap: VirtioPciCap {
195                cap_vndr: 0,
196                cap_next: 0,
197                cap_len: std::mem::size_of::<VirtioPciNotifyCap>() as u8,
198                cfg_type: cfg_type as u8,
199                bar,
200                id: 0,
201                padding: [0; 2],
202                offset: Le32::from(offset),
203                length: Le32::from(length),
204            },
205            notify_off_multiplier: multiplier,
206        }
207    }
208}
209
210#[repr(C)]
211#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
212pub struct VirtioPciShmCap {
213    cap: VirtioPciCap,
214    offset_hi: Le32, // Most sig 32 bits of offset
215    length_hi: Le32, // Most sig 32 bits of length
216}
217
218impl PciCapability for VirtioPciShmCap {
219    fn bytes(&self) -> &[u8] {
220        self.as_bytes()
221    }
222
223    fn id(&self) -> PciCapabilityID {
224        PciCapabilityID::VendorSpecific
225    }
226
227    fn writable_bits(&self) -> Vec<u32> {
228        vec![0u32; 6]
229    }
230}
231
232impl VirtioPciShmCap {
233    pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self {
234        VirtioPciShmCap {
235            cap: VirtioPciCap {
236                cap_vndr: 0,
237                cap_next: 0,
238                cap_len: std::mem::size_of::<VirtioPciShmCap>() as u8,
239                cfg_type: cfg_type as u8,
240                bar,
241                id: shmid,
242                padding: [0; 2],
243                offset: Le32::from(offset as u32),
244                length: Le32::from(length as u32),
245            },
246            offset_hi: Le32::from((offset >> 32) as u32),
247            length_hi: Le32::from((length >> 32) as u32),
248        }
249    }
250}
251
252// Allocate one bar for the structs pointed to by the capability structures.
253const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
254const COMMON_CONFIG_SIZE: u64 = 56;
255const COMMON_CONFIG_LAST: u64 = COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE - 1;
256const ISR_CONFIG_BAR_OFFSET: u64 = 0x1000;
257const ISR_CONFIG_SIZE: u64 = 1;
258const ISR_CONFIG_LAST: u64 = ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE - 1;
259const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
260const DEVICE_CONFIG_SIZE: u64 = 0x1000;
261const DEVICE_CONFIG_LAST: u64 = DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE - 1;
262const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
263const NOTIFICATION_SIZE: u64 = 0x1000;
264const NOTIFICATION_LAST: u64 = NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE - 1;
265const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
266const MSIX_TABLE_SIZE: u64 = 0x1000;
267const MSIX_TABLE_LAST: u64 = MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE - 1;
268const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
269const MSIX_PBA_SIZE: u64 = 0x1000;
270const MSIX_PBA_LAST: u64 = MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE - 1;
271const CAPABILITY_BAR_SIZE: u64 = 0x8000;
272
273const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
274
275const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
276const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
277const VIRTIO_PCI_REVISION_ID: u8 = 1;
278
279const CAPABILITIES_BAR_NUM: usize = 0;
280const SHMEM_BAR_NUM: usize = 2;
281
282struct QueueEvent {
283    event: Event,
284    ioevent_registered: bool,
285}
286
287/// Implements the
288/// [PCI](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-650001)
289/// transport for virtio devices.
290pub struct VirtioPciDevice {
291    config_regs: PciConfiguration,
292    preferred_address: Option<PciAddress>,
293    pci_address: Option<PciAddress>,
294
295    device: Box<dyn VirtioDevice>,
296    device_activated: bool,
297    disable_intx: bool,
298
299    interrupt: Option<Interrupt>,
300    interrupt_evt: Option<IrqLevelEvent>,
301    interrupt_resample_worker: Option<WorkerThread<()>>,
302
303    queues: Vec<QueueConfig>,
304    queue_evts: Vec<QueueEvent>,
305    mem: GuestMemory,
306    settings_bar: PciBarIndex,
307    msix_config: Arc<Mutex<MsixConfig>>,
308    pm_config: Arc<Mutex<PmConfig>>,
309    common_config: VirtioPciCommonConfig,
310
311    iommu: Option<Arc<Mutex<IpcMemoryMapper>>>,
312
313    // API client that is present if the device has shared memory regions, and
314    // is used to map/unmap files into the shared memory region.
315    shared_memory_vm_memory_client: Option<VmMemoryClient>,
316
317    // API client for registration of ioevents when PCI BAR reprogramming is detected.
318    ioevent_vm_memory_client: VmMemoryClient,
319
320    // State only present while asleep.
321    sleep_state: Option<SleepState>,
322
323    vm_control_tube: Arc<Mutex<Tube>>,
324}
325
326enum SleepState {
327    // Asleep and device hasn't been activated yet by the guest.
328    Inactive,
329    // Asleep and device has been activated by the guest.
330    Active {
331        /// The queues returned from `VirtioDevice::virtio_sleep`.
332        /// Map is from queue index -> Queue.
333        activated_queues: BTreeMap<usize, Queue>,
334    },
335}
336
337#[derive(Serialize, Deserialize)]
338struct VirtioPciDeviceSnapshot {
339    config_regs: AnySnapshot,
340
341    inner_device: AnySnapshot,
342    device_activated: bool,
343
344    interrupt: Option<InterruptSnapshot>,
345    msix_config: AnySnapshot,
346    common_config: VirtioPciCommonConfig,
347
348    queues: Vec<AnySnapshot>,
349    activated_queues: Option<Vec<(usize, AnySnapshot)>>,
350}
351
352impl VirtioPciDevice {
353    /// Constructs a new PCI transport for the given virtio device.
354    pub fn new(
355        mem: GuestMemory,
356        device: Box<dyn VirtioDevice>,
357        msi_device_tube: Tube,
358        disable_intx: bool,
359        shared_memory_vm_memory_client: Option<VmMemoryClient>,
360        ioevent_vm_memory_client: VmMemoryClient,
361        vm_control_tube: Tube,
362    ) -> Result<Self> {
363        // shared_memory_vm_memory_client is required if there are shared memory regions.
364        assert_eq!(
365            device.get_shared_memory_region().is_none(),
366            shared_memory_vm_memory_client.is_none()
367        );
368
369        let mut queue_evts = Vec::new();
370        for _ in device.queue_max_sizes() {
371            queue_evts.push(QueueEvent {
372                event: Event::new()?,
373                ioevent_registered: false,
374            });
375        }
376        let queues = device
377            .queue_max_sizes()
378            .iter()
379            .map(|&s| QueueConfig::new(s, device.features()))
380            .collect();
381
382        let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
383
384        let (pci_device_class, pci_device_subclass) = match device.device_type() {
385            DeviceType::Net => (
386                PciClassCode::NetworkController,
387                &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
388            ),
389            DeviceType::Block => (
390                PciClassCode::MassStorage,
391                &PciMassStorageSubclass::Other as &dyn PciSubclass,
392            ),
393            DeviceType::Console => (
394                PciClassCode::SimpleCommunicationController,
395                &PciSimpleCommunicationControllerSubclass::Other as &dyn PciSubclass,
396            ),
397            DeviceType::Rng => (
398                PciClassCode::BaseSystemPeripheral,
399                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
400            ),
401            DeviceType::Balloon => (
402                PciClassCode::BaseSystemPeripheral,
403                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
404            ),
405            DeviceType::Scsi => (
406                PciClassCode::MassStorage,
407                &PciMassStorageSubclass::Scsi as &dyn PciSubclass,
408            ),
409            DeviceType::P9 => (
410                PciClassCode::NetworkController,
411                &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
412            ),
413            DeviceType::Gpu => (
414                PciClassCode::DisplayController,
415                &PciDisplaySubclass::Other as &dyn PciSubclass,
416            ),
417            DeviceType::Input => (
418                PciClassCode::InputDevice,
419                &PciInputDeviceSubclass::Other as &dyn PciSubclass,
420            ),
421            DeviceType::Vsock => (
422                PciClassCode::NetworkController,
423                &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
424            ),
425            DeviceType::Iommu => (
426                PciClassCode::BaseSystemPeripheral,
427                &PciBaseSystemPeripheralSubclass::Iommu as &dyn PciSubclass,
428            ),
429            DeviceType::Sound => (
430                PciClassCode::MultimediaController,
431                &PciMultimediaSubclass::AudioController as &dyn PciSubclass,
432            ),
433            DeviceType::Fs => (
434                PciClassCode::MassStorage,
435                &PciMassStorageSubclass::Other as &dyn PciSubclass,
436            ),
437            DeviceType::Pmem => (
438                PciClassCode::MassStorage,
439                &PciMassStorageSubclass::NonVolatileMemory as &dyn PciSubclass,
440            ),
441            DeviceType::Mac80211HwSim => (
442                PciClassCode::WirelessController,
443                &PciWirelessControllerSubclass::Other as &dyn PciSubclass,
444            ),
445            DeviceType::VideoEncoder => (
446                PciClassCode::MultimediaController,
447                &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
448            ),
449            DeviceType::VideoDecoder => (
450                PciClassCode::MultimediaController,
451                &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
452            ),
453            DeviceType::Media => (
454                PciClassCode::MultimediaController,
455                &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
456            ),
457            DeviceType::Scmi => (
458                PciClassCode::BaseSystemPeripheral,
459                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
460            ),
461            DeviceType::Wl => (
462                PciClassCode::DisplayController,
463                &PciDisplaySubclass::Other as &dyn PciSubclass,
464            ),
465            DeviceType::Tpm => (
466                PciClassCode::BaseSystemPeripheral,
467                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
468            ),
469            DeviceType::Pvclock => (
470                PciClassCode::BaseSystemPeripheral,
471                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
472            ),
473        };
474
475        let num_interrupts = device.num_interrupts();
476
477        // One MSI-X vector per queue plus one for configuration changes.
478        let msix_num = u16::try_from(num_interrupts + 1).map_err(|_| base::Error::new(ERANGE))?;
479        let msix_config = Arc::new(Mutex::new(MsixConfig::new(
480            msix_num,
481            msi_device_tube,
482            PciId::new(VIRTIO_PCI_VENDOR_ID, pci_device_id).into(),
483            device.debug_label(),
484        )));
485
486        let config_regs = PciConfiguration::new(
487            VIRTIO_PCI_VENDOR_ID,
488            pci_device_id,
489            pci_device_class,
490            pci_device_subclass,
491            None,
492            PciHeaderType::Device,
493            VIRTIO_PCI_VENDOR_ID,
494            pci_device_id,
495            VIRTIO_PCI_REVISION_ID,
496        );
497
498        Ok(VirtioPciDevice {
499            config_regs,
500            preferred_address: device.pci_address(),
501            pci_address: None,
502            device,
503            device_activated: false,
504            disable_intx,
505            interrupt: None,
506            interrupt_evt: None,
507            interrupt_resample_worker: None,
508            queues,
509            queue_evts,
510            mem,
511            settings_bar: 0,
512            msix_config,
513            pm_config: Arc::new(Mutex::new(PmConfig::new(true))),
514            common_config: VirtioPciCommonConfig {
515                driver_status: 0,
516                config_generation: 0,
517                device_feature_select: 0,
518                driver_feature_select: 0,
519                queue_select: 0,
520                msix_config: VIRTIO_MSI_NO_VECTOR,
521            },
522            iommu: None,
523            shared_memory_vm_memory_client,
524            ioevent_vm_memory_client,
525            sleep_state: None,
526            vm_control_tube: Arc::new(Mutex::new(vm_control_tube)),
527        })
528    }
529
530    fn is_driver_ready(&self) -> bool {
531        let ready_bits = (VIRTIO_CONFIG_S_ACKNOWLEDGE
532            | VIRTIO_CONFIG_S_DRIVER
533            | VIRTIO_CONFIG_S_DRIVER_OK
534            | VIRTIO_CONFIG_S_FEATURES_OK) as u8;
535        (self.common_config.driver_status & ready_bits) == ready_bits
536            && self.common_config.driver_status & VIRTIO_CONFIG_S_FAILED as u8 == 0
537    }
538
539    fn is_device_suspended(&self) -> bool {
540        (self.common_config.driver_status & VIRTIO_CONFIG_S_SUSPEND as u8) != 0
541    }
542
543    /// Determines if the driver has requested the device reset itself
544    fn is_reset_requested(&self) -> bool {
545        self.common_config.driver_status == DEVICE_RESET as u8
546    }
547
548    fn add_settings_pci_capabilities(
549        &mut self,
550        settings_bar: u8,
551    ) -> std::result::Result<(), PciDeviceError> {
552        // Add pointers to the different configuration structures from the PCI capabilities.
553        let common_cap = VirtioPciCap::new(
554            PciCapabilityType::CommonConfig,
555            settings_bar,
556            COMMON_CONFIG_BAR_OFFSET as u32,
557            COMMON_CONFIG_SIZE as u32,
558        );
559        self.config_regs
560            .add_capability(&common_cap, None)
561            .map_err(PciDeviceError::CapabilitiesSetup)?;
562
563        let isr_cap = VirtioPciCap::new(
564            PciCapabilityType::IsrConfig,
565            settings_bar,
566            ISR_CONFIG_BAR_OFFSET as u32,
567            ISR_CONFIG_SIZE as u32,
568        );
569        self.config_regs
570            .add_capability(&isr_cap, None)
571            .map_err(PciDeviceError::CapabilitiesSetup)?;
572
573        // TODO(dgreid) - set based on device's configuration size?
574        let device_cap = VirtioPciCap::new(
575            PciCapabilityType::DeviceConfig,
576            settings_bar,
577            DEVICE_CONFIG_BAR_OFFSET as u32,
578            DEVICE_CONFIG_SIZE as u32,
579        );
580        self.config_regs
581            .add_capability(&device_cap, None)
582            .map_err(PciDeviceError::CapabilitiesSetup)?;
583
584        let notify_cap = VirtioPciNotifyCap::new(
585            PciCapabilityType::NotifyConfig,
586            settings_bar,
587            NOTIFICATION_BAR_OFFSET as u32,
588            NOTIFICATION_SIZE as u32,
589            Le32::from(NOTIFY_OFF_MULTIPLIER),
590        );
591        self.config_regs
592            .add_capability(&notify_cap, None)
593            .map_err(PciDeviceError::CapabilitiesSetup)?;
594
595        //TODO(dgreid) - How will the configuration_cap work?
596        let configuration_cap = VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0);
597        self.config_regs
598            .add_capability(&configuration_cap, None)
599            .map_err(PciDeviceError::CapabilitiesSetup)?;
600
601        let msix_cap = MsixCap::new(
602            settings_bar,
603            self.msix_config.lock().num_vectors(),
604            MSIX_TABLE_BAR_OFFSET as u32,
605            settings_bar,
606            MSIX_PBA_BAR_OFFSET as u32,
607        );
608        self.config_regs
609            .add_capability(&msix_cap, Some(Box::new(self.msix_config.clone())))
610            .map_err(PciDeviceError::CapabilitiesSetup)?;
611
612        self.config_regs
613            .add_capability(&PciPmCap::new(), Some(Box::new(self.pm_config.clone())))
614            .map_err(PciDeviceError::CapabilitiesSetup)?;
615
616        self.settings_bar = settings_bar as PciBarIndex;
617        Ok(())
618    }
619
620    /// Activates the underlying `VirtioDevice`. `assign_irq` has to be called first.
621    fn activate(&mut self) -> anyhow::Result<()> {
622        let interrupt = Interrupt::new(
623            self.interrupt_evt
624                .as_ref()
625                .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
626                .try_clone()
627                .with_context(|| format!("{} failed to clone interrupt_evt", self.debug_label()))?,
628            Some(self.msix_config.clone()),
629            self.common_config.msix_config,
630            #[cfg(target_arch = "x86_64")]
631            Some((
632                PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
633                MetricEventType::VirtioWakeup {
634                    virtio_id: self.device.device_type() as u32,
635                },
636            )),
637        );
638        self.interrupt = Some(interrupt.clone());
639        self.interrupt_resample_worker = interrupt.spawn_resample_thread();
640
641        let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
642        let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
643
644        // Use ready queues and their events.
645        let queues = self
646            .queues
647            .iter_mut()
648            .enumerate()
649            .zip(self.queue_evts.iter_mut())
650            .filter(|((_, q), _)| q.ready())
651            .map(|((queue_index, queue), evt)| {
652                if !evt.ioevent_registered {
653                    self.ioevent_vm_memory_client
654                        .register_io_event(
655                            evt.event.try_clone().context("failed to clone Event")?,
656                            notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
657                            Datamatch::AnyLength,
658                        )
659                        .context("failed to register ioevent")?;
660                    evt.ioevent_registered = true;
661                }
662                let queue_evt = evt.event.try_clone().context("failed to clone queue_evt")?;
663                Ok((
664                    queue_index,
665                    queue
666                        .activate(&self.mem, queue_evt, interrupt.clone())
667                        .context("failed to activate queue")?,
668                ))
669            })
670            .collect::<anyhow::Result<BTreeMap<usize, Queue>>>()?;
671
672        if let Err(e) = self.device.activate(self.mem.clone(), interrupt, queues) {
673            error!("{} activate failed: {:#}", self.debug_label(), e);
674            self.common_config.driver_status |= VIRTIO_CONFIG_S_NEEDS_RESET as u8;
675        } else {
676            self.device_activated = true;
677        }
678
679        Ok(())
680    }
681
682    fn unregister_ioevents(&mut self) -> anyhow::Result<()> {
683        let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
684        let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
685
686        for (queue_index, evt) in self.queue_evts.iter_mut().enumerate() {
687            if evt.ioevent_registered {
688                self.ioevent_vm_memory_client
689                    .unregister_io_event(
690                        evt.event.try_clone().context("failed to clone Event")?,
691                        notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
692                        Datamatch::AnyLength,
693                    )
694                    .context("failed to unregister ioevent")?;
695                evt.ioevent_registered = false;
696            }
697        }
698        Ok(())
699    }
700
701    pub fn virtio_device(&self) -> &dyn VirtioDevice {
702        self.device.as_ref()
703    }
704
705    pub fn pci_address(&self) -> Option<PciAddress> {
706        self.pci_address
707    }
708
709    #[cfg(target_arch = "x86_64")]
710    fn handle_pm_status_change(&mut self, status: &PmStatusChange) {
711        if let Some(interrupt) = self.interrupt.as_mut() {
712            interrupt.set_wakeup_event_active(status.to == PciDevicePower::D3)
713        }
714    }
715
716    #[cfg(not(target_arch = "x86_64"))]
717    fn handle_pm_status_change(&mut self, _status: &PmStatusChange) {}
718}
719
720impl PciDevice for VirtioPciDevice {
721    fn debug_label(&self) -> String {
722        format!("pci{}", self.device.debug_label())
723    }
724
725    fn preferred_address(&self) -> Option<PciAddress> {
726        self.preferred_address
727    }
728
729    fn allocate_address(
730        &mut self,
731        resources: &mut SystemAllocator,
732    ) -> std::result::Result<PciAddress, PciDeviceError> {
733        if self.pci_address.is_none() {
734            if let Some(address) = self.preferred_address {
735                if !resources.reserve_pci(address, self.debug_label()) {
736                    return Err(PciDeviceError::PciAllocationFailed);
737                }
738                self.pci_address = Some(address);
739            } else {
740                self.pci_address = resources.allocate_pci(0, self.debug_label());
741            }
742            self.msix_config
743                .lock()
744                .set_pci_address(self.pci_address.unwrap());
745        }
746        self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
747    }
748
749    fn keep_rds(&self) -> Vec<RawDescriptor> {
750        let mut rds = self.device.keep_rds();
751        rds.extend(
752            self.queue_evts
753                .iter()
754                .map(|qe| qe.event.as_raw_descriptor()),
755        );
756        if let Some(interrupt_evt) = &self.interrupt_evt {
757            rds.extend(interrupt_evt.as_raw_descriptors());
758        }
759        let descriptor = self.msix_config.lock().get_msi_socket();
760        rds.push(descriptor);
761        if let Some(iommu) = &self.iommu {
762            rds.append(&mut iommu.lock().as_raw_descriptors());
763        }
764        rds.push(self.ioevent_vm_memory_client.as_raw_descriptor());
765        rds.push(self.vm_control_tube.lock().as_raw_descriptor());
766        rds
767    }
768
769    fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
770        self.interrupt_evt = Some(irq_evt);
771        if !self.disable_intx {
772            self.config_regs.set_irq(irq_num as u8, pin);
773        }
774    }
775
776    fn allocate_io_bars(
777        &mut self,
778        resources: &mut SystemAllocator,
779    ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
780        let device_type = self.device.device_type();
781        allocate_io_bars(
782            self,
783            |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
784                resources
785                    .allocate_mmio(
786                        size,
787                        alloc,
788                        format!("virtio-{device_type}-cap_bar"),
789                        alloc_option,
790                    )
791                    .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
792            },
793        )
794    }
795
796    fn allocate_device_bars(
797        &mut self,
798        resources: &mut SystemAllocator,
799    ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
800        let device_type = self.device.device_type();
801        allocate_device_bars(
802            self,
803            |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
804                resources
805                    .allocate_mmio(
806                        size,
807                        alloc,
808                        format!("virtio-{device_type}-custom_bar"),
809                        alloc_option,
810                    )
811                    .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
812            },
813        )
814    }
815
816    fn destroy_device(&mut self) {
817        if let Err(e) = self.unregister_ioevents() {
818            error!("error destroying {}: {:?}", &self.debug_label(), &e);
819        }
820    }
821
822    fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
823        self.config_regs.get_bar_configuration(bar_num)
824    }
825
826    fn register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> {
827        let mut caps = self.device.get_device_caps();
828        if let Some(region) = self.device.get_shared_memory_region() {
829            caps.push(Box::new(VirtioPciShmCap::new(
830                PciCapabilityType::SharedMemoryConfig,
831                SHMEM_BAR_NUM as u8,
832                0,
833                region.length,
834                region.id,
835            )));
836        }
837
838        for cap in caps {
839            self.config_regs
840                .add_capability(&*cap, None)
841                .map_err(PciDeviceError::CapabilitiesSetup)?;
842        }
843
844        Ok(())
845    }
846
847    fn read_config_register(&self, reg_idx: usize) -> u32 {
848        self.config_regs.read_reg(reg_idx)
849    }
850
851    fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
852        if let Some(res) = self.config_regs.write_reg(reg_idx, offset, data) {
853            if let Some(msix_behavior) = res.downcast_ref::<MsixStatus>() {
854                self.device.control_notify(*msix_behavior);
855            } else if let Some(status) = res.downcast_ref::<PmStatusChange>() {
856                self.handle_pm_status_change(status);
857            }
858        }
859    }
860
861    fn setup_pci_config_mapping(
862        &mut self,
863        shmem: &SharedMemory,
864        base: usize,
865        len: usize,
866    ) -> std::result::Result<bool, PciDeviceError> {
867        self.config_regs
868            .setup_mapping(shmem, base, len)
869            .map(|_| true)
870            .map_err(PciDeviceError::MmioSetup)
871    }
872
873    fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
874        if bar_index == self.settings_bar {
875            match offset {
876                COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.read(
877                    offset - COMMON_CONFIG_BAR_OFFSET,
878                    data,
879                    &mut self.queues,
880                    self.device.as_mut(),
881                ),
882                ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
883                    if let Some(v) = data.get_mut(0) {
884                        // Reading this register resets it to 0.
885                        *v = if let Some(interrupt) = &self.interrupt {
886                            interrupt.read_and_reset_interrupt_status()
887                        } else {
888                            0
889                        };
890                    }
891                }
892                DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
893                    self.device
894                        .read_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
895                }
896                NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
897                    // Handled with ioevents.
898                }
899                MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
900                    self.msix_config
901                        .lock()
902                        .read_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
903                }
904                MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
905                    self.msix_config
906                        .lock()
907                        .read_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
908                }
909                _ => (),
910            }
911        }
912    }
913
914    fn write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8]) {
915        let was_suspended = self.is_device_suspended();
916
917        if bar_index == self.settings_bar {
918            match offset {
919                COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.write(
920                    offset - COMMON_CONFIG_BAR_OFFSET,
921                    data,
922                    &mut self.queues,
923                    self.device.as_mut(),
924                ),
925                ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
926                    if let Some(v) = data.first() {
927                        if let Some(interrupt) = &self.interrupt {
928                            interrupt.clear_interrupt_status_bits(*v);
929                        }
930                    }
931                }
932                DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
933                    self.device
934                        .write_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
935                }
936                NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
937                    // Notifications are normally handled with ioevents inside the hypervisor and
938                    // do not reach write_bar(). However, if the ioevent registration hasn't
939                    // finished yet, it is possible for a write to the notification region to make
940                    // it through as a normal MMIO exit and end up here. To handle that case,
941                    // provide a fallback that looks up the corresponding queue for the offset and
942                    // triggers its event, which is equivalent to what the ioevent would do.
943                    let queue_index = (offset - NOTIFICATION_BAR_OFFSET) as usize
944                        / NOTIFY_OFF_MULTIPLIER as usize;
945                    trace!("write_bar notification fallback for queue {}", queue_index);
946                    if let Some(evt) = self.queue_evts.get(queue_index) {
947                        let _ = evt.event.signal();
948                    }
949                }
950                MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
951                    let behavior = self
952                        .msix_config
953                        .lock()
954                        .write_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
955                    self.device.control_notify(behavior);
956                }
957                MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
958                    self.msix_config
959                        .lock()
960                        .write_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
961                }
962                _ => (),
963            }
964        }
965
966        if !self.device_activated && self.is_driver_ready() {
967            if let Err(e) = self.activate() {
968                error!("failed to activate device: {:#}", e);
969            }
970        }
971
972        let is_suspended = self.is_device_suspended();
973        if is_suspended != was_suspended {
974            if let Some(interrupt) = self.interrupt.as_mut() {
975                interrupt.set_suspended(is_suspended);
976            }
977        }
978
979        // Device has been reset by the driver
980        if self.device_activated && self.is_reset_requested() {
981            if let Err(e) = self.device.reset() {
982                error!("failed to reset {} device: {:#}", self.debug_label(), e);
983            } else {
984                self.device_activated = false;
985                // reset queues
986                self.queues.iter_mut().for_each(QueueConfig::reset);
987                // select queue 0 by default
988                self.common_config.queue_select = 0;
989                if let Err(e) = self.unregister_ioevents() {
990                    error!("failed to unregister ioevents: {:#}", e);
991                }
992                if let Some(interrupt_resample_worker) = self.interrupt_resample_worker.take() {
993                    interrupt_resample_worker.stop();
994                }
995            }
996        }
997    }
998
999    fn on_device_sandboxed(&mut self) {
1000        self.device.on_device_sandboxed();
1001    }
1002
1003    #[cfg(target_arch = "x86_64")]
1004    fn generate_acpi(&mut self, sdts: &mut Vec<SDT>) -> anyhow::Result<()> {
1005        self.device.generate_acpi(
1006            self.pci_address.expect("pci_address must be assigned"),
1007            sdts,
1008        )
1009    }
1010
1011    fn as_virtio_pci_device(&self) -> Option<&VirtioPciDevice> {
1012        Some(self)
1013    }
1014}
1015
1016fn allocate_io_bars<F>(
1017    virtio_pci_device: &mut VirtioPciDevice,
1018    mut alloc_fn: F,
1019) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1020where
1021    F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1022{
1023    let address = virtio_pci_device
1024        .pci_address
1025        .expect("allocate_address must be called prior to allocate_io_bars");
1026    // Allocate one bar for the structures pointed to by the capability structures.
1027    let settings_config_addr = alloc_fn(
1028        CAPABILITY_BAR_SIZE,
1029        Alloc::PciBar {
1030            bus: address.bus,
1031            dev: address.dev,
1032            func: address.func,
1033            bar: 0,
1034        },
1035        AllocOptions::new()
1036            .max_address(u32::MAX.into())
1037            .align(CAPABILITY_BAR_SIZE),
1038    )?;
1039    let config = PciBarConfiguration::new(
1040        CAPABILITIES_BAR_NUM,
1041        CAPABILITY_BAR_SIZE,
1042        PciBarRegionType::Memory32BitRegion,
1043        PciBarPrefetchable::NotPrefetchable,
1044    )
1045    .set_address(settings_config_addr);
1046    let settings_bar = virtio_pci_device
1047        .config_regs
1048        .add_pci_bar(config)
1049        .map_err(|e| PciDeviceError::IoRegistrationFailed(settings_config_addr, e))?
1050        as u8;
1051    // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
1052    virtio_pci_device.add_settings_pci_capabilities(settings_bar)?;
1053
1054    Ok(vec![BarRange {
1055        addr: settings_config_addr,
1056        size: CAPABILITY_BAR_SIZE,
1057        prefetchable: false,
1058    }])
1059}
1060
1061fn allocate_device_bars<F>(
1062    virtio_pci_device: &mut VirtioPciDevice,
1063    mut alloc_fn: F,
1064) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1065where
1066    F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1067{
1068    let address = virtio_pci_device
1069        .pci_address
1070        .expect("allocate_address must be called prior to allocate_device_bars");
1071
1072    let configs = virtio_pci_device.device.get_device_bars(address);
1073    let configs = if !configs.is_empty() {
1074        configs
1075    } else {
1076        let region = match virtio_pci_device.device.get_shared_memory_region() {
1077            None => return Ok(Vec::new()),
1078            Some(r) => r,
1079        };
1080        let config = PciBarConfiguration::new(
1081            SHMEM_BAR_NUM,
1082            region
1083                .length
1084                .checked_next_power_of_two()
1085                .expect("bar too large"),
1086            PciBarRegionType::Memory64BitRegion,
1087            PciBarPrefetchable::Prefetchable,
1088        );
1089
1090        let alloc = Alloc::PciBar {
1091            bus: address.bus,
1092            dev: address.dev,
1093            func: address.func,
1094            bar: config.bar_index() as u8,
1095        };
1096
1097        let vm_memory_client = virtio_pci_device
1098            .shared_memory_vm_memory_client
1099            .take()
1100            .expect("missing shared_memory_tube");
1101
1102        // See comment VmMemoryRequest::execute
1103        let can_prepare = !virtio_pci_device
1104            .device
1105            .expose_shmem_descriptors_with_viommu();
1106        let prepare_type = if can_prepare {
1107            virtio_pci_device.device.get_shared_memory_prepare_type()
1108        } else {
1109            SharedMemoryPrepareType::DynamicPerMapping
1110        };
1111
1112        let vm_requester = Box::new(VmRequester::new(vm_memory_client, alloc, prepare_type));
1113        virtio_pci_device
1114            .device
1115            .set_shared_memory_mapper(vm_requester);
1116
1117        vec![config]
1118    };
1119    let mut ranges = vec![];
1120    for config in configs {
1121        let device_addr = alloc_fn(
1122            config.size(),
1123            Alloc::PciBar {
1124                bus: address.bus,
1125                dev: address.dev,
1126                func: address.func,
1127                bar: config.bar_index() as u8,
1128            },
1129            AllocOptions::new()
1130                .prefetchable(config.is_prefetchable())
1131                .align(config.size()),
1132        )?;
1133        let config = config.set_address(device_addr);
1134        let _device_bar = virtio_pci_device
1135            .config_regs
1136            .add_pci_bar(config)
1137            .map_err(|e| PciDeviceError::IoRegistrationFailed(device_addr, e))?;
1138        ranges.push(BarRange {
1139            addr: device_addr,
1140            size: config.size(),
1141            prefetchable: false,
1142        });
1143    }
1144
1145    if virtio_pci_device
1146        .device
1147        .get_shared_memory_region()
1148        .is_some()
1149    {
1150        let shmem_region = AddressRange::from_start_and_size(ranges[0].addr, ranges[0].size)
1151            .expect("invalid shmem region");
1152        virtio_pci_device
1153            .device
1154            .set_shared_memory_region(shmem_region);
1155    }
1156
1157    Ok(ranges)
1158}
1159
1160#[cfg(feature = "pci-hotplug")]
1161impl HotPluggable for VirtioPciDevice {
1162    /// Sets PciAddress to pci_addr
1163    fn set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError> {
1164        self.pci_address = Some(pci_addr);
1165        self.msix_config
1166            .lock()
1167            .set_pci_address(self.pci_address.unwrap());
1168        Ok(())
1169    }
1170
1171    /// Configures IO BAR layout without memory alloc.
1172    fn configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1173        let mut simple_allocator = SimpleAllocator::new(0);
1174        allocate_io_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1175    }
1176
1177    /// Configure device BAR layout without memory alloc.
1178    fn configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1179        // For device BAR, the space for CAPABILITY_BAR_SIZE should be skipped.
1180        let mut simple_allocator = SimpleAllocator::new(CAPABILITY_BAR_SIZE);
1181        allocate_device_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1182    }
1183}
1184
1185#[cfg(feature = "pci-hotplug")]
1186/// A simple allocator that can allocate non-overlapping aligned intervals.
1187///
1188/// The addresses allocated are not exclusively reserved for the device, and cannot be used for a
1189/// static device. The allocated placeholder address describes the layout of PCI BAR for hotplugged
1190/// devices. Actual memory allocation is handled by PCI BAR reprogramming initiated by guest OS.
1191struct SimpleAllocator {
1192    current_address: u64,
1193}
1194
1195#[cfg(feature = "pci-hotplug")]
1196impl SimpleAllocator {
1197    /// Constructs SimpleAllocator. Address will start at or after base_address.
1198    fn new(base_address: u64) -> Self {
1199        Self {
1200            current_address: base_address,
1201        }
1202    }
1203
1204    /// Allocate memory with size and align. Returns the start of address.
1205    fn alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError> {
1206        if align > 0 {
1207            // aligns current_address upward to align.
1208            self.current_address = self.current_address.next_multiple_of(align);
1209        }
1210        let start_address = self.current_address;
1211        self.current_address += size;
1212        Ok(start_address)
1213    }
1214}
1215
1216impl Suspendable for VirtioPciDevice {
1217    fn sleep(&mut self) -> anyhow::Result<()> {
1218        // If the device is already asleep, we should not request it to sleep again.
1219        if self.sleep_state.is_some() {
1220            return Ok(());
1221        }
1222
1223        if let Some(queues) = self.device.virtio_sleep()? {
1224            anyhow::ensure!(
1225                self.device_activated,
1226                format!(
1227                    "unactivated device {} returned queues on sleep",
1228                    self.debug_label()
1229                ),
1230            );
1231            self.sleep_state = Some(SleepState::Active {
1232                activated_queues: queues,
1233            });
1234        } else {
1235            anyhow::ensure!(
1236                !self.device_activated,
1237                format!(
1238                    "activated device {} didn't return queues on sleep",
1239                    self.debug_label()
1240                ),
1241            );
1242            self.sleep_state = Some(SleepState::Inactive);
1243        }
1244        Ok(())
1245    }
1246
1247    fn wake(&mut self) -> anyhow::Result<()> {
1248        match self.sleep_state.take() {
1249            None => {
1250                // If the device is already awake, we should not request it to wake again.
1251            }
1252            Some(SleepState::Inactive) => {
1253                self.device.virtio_wake(None).with_context(|| {
1254                    format!(
1255                        "virtio_wake failed for {}, can't recover",
1256                        self.debug_label(),
1257                    )
1258                })?;
1259            }
1260            Some(SleepState::Active { activated_queues }) => {
1261                self.device
1262                    .virtio_wake(Some((
1263                        self.mem.clone(),
1264                        self.interrupt
1265                            .clone()
1266                            .expect("interrupt missing for already active queues"),
1267                        activated_queues,
1268                    )))
1269                    .with_context(|| {
1270                        format!(
1271                            "virtio_wake failed for {}, can't recover",
1272                            self.debug_label(),
1273                        )
1274                    })?;
1275            }
1276        };
1277        Ok(())
1278    }
1279
1280    fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
1281        if self.iommu.is_some() {
1282            return Err(anyhow!("Cannot snapshot if iommu is present."));
1283        }
1284
1285        AnySnapshot::to_any(VirtioPciDeviceSnapshot {
1286            config_regs: self.config_regs.snapshot()?,
1287            inner_device: self.device.virtio_snapshot()?,
1288            device_activated: self.device_activated,
1289            interrupt: self.interrupt.as_ref().map(|i| i.snapshot()),
1290            msix_config: self.msix_config.lock().snapshot()?,
1291            common_config: self.common_config,
1292            queues: self
1293                .queues
1294                .iter()
1295                .map(|q| q.snapshot())
1296                .collect::<anyhow::Result<Vec<_>>>()?,
1297            activated_queues: match &self.sleep_state {
1298                None => {
1299                    anyhow::bail!("tried snapshotting while awake")
1300                }
1301                Some(SleepState::Inactive) => None,
1302                Some(SleepState::Active { activated_queues }) => {
1303                    let mut serialized_queues = Vec::new();
1304                    for (index, queue) in activated_queues.iter() {
1305                        serialized_queues.push((*index, queue.snapshot()?));
1306                    }
1307                    Some(serialized_queues)
1308                }
1309            },
1310        })
1311        .context("failed to serialize VirtioPciDeviceSnapshot")
1312    }
1313
1314    fn restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
1315        // Restoring from an activated state is more complex and low priority, so just fail for
1316        // now. We'll need to reset the device before restoring, e.g. must call
1317        // self.unregister_ioevents().
1318        anyhow::ensure!(
1319            !self.device_activated,
1320            "tried to restore after virtio device activated. not supported yet"
1321        );
1322
1323        let deser: VirtioPciDeviceSnapshot = AnySnapshot::from_any(data)?;
1324
1325        self.config_regs.restore(deser.config_regs)?;
1326        self.device_activated = deser.device_activated;
1327
1328        self.msix_config.lock().restore(deser.msix_config)?;
1329        self.common_config = deser.common_config;
1330
1331        // Restore the interrupt. This must be done after restoring the MSI-X configuration, but
1332        // before restoring the queues.
1333        if let Some(deser_interrupt) = deser.interrupt {
1334            let interrupt = Interrupt::new_from_snapshot(
1335                self.interrupt_evt
1336                    .as_ref()
1337                    .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
1338                    .try_clone()
1339                    .with_context(|| {
1340                        format!("{} failed to clone interrupt_evt", self.debug_label())
1341                    })?,
1342                Some(self.msix_config.clone()),
1343                self.common_config.msix_config,
1344                deser_interrupt,
1345                #[cfg(target_arch = "x86_64")]
1346                Some((
1347                    PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
1348                    MetricEventType::VirtioWakeup {
1349                        virtio_id: self.device.device_type() as u32,
1350                    },
1351                )),
1352            );
1353            self.interrupt_resample_worker = interrupt.spawn_resample_thread();
1354            self.interrupt = Some(interrupt);
1355        }
1356
1357        assert_eq!(
1358            self.queues.len(),
1359            deser.queues.len(),
1360            "device must have the same number of queues"
1361        );
1362        for (q, s) in self.queues.iter_mut().zip(deser.queues.into_iter()) {
1363            q.restore(s)?;
1364        }
1365
1366        // Verify we are asleep and inactive.
1367        match &self.sleep_state {
1368            None => {
1369                anyhow::bail!("tried restoring while awake")
1370            }
1371            Some(SleepState::Inactive) => {}
1372            Some(SleepState::Active { .. }) => {
1373                anyhow::bail!("tried to restore after virtio device activated. not supported yet")
1374            }
1375        };
1376        // Restore `sleep_state`.
1377        if let Some(activated_queues_snapshot) = deser.activated_queues {
1378            let interrupt = self
1379                .interrupt
1380                .as_ref()
1381                .context("tried to restore active queues without an interrupt")?;
1382            let mut activated_queues = BTreeMap::new();
1383            for (index, queue_snapshot) in activated_queues_snapshot {
1384                let queue_config = self
1385                    .queues
1386                    .get(index)
1387                    .with_context(|| format!("missing queue config for activated queue {index}"))?;
1388                let queue_evt = self
1389                    .queue_evts
1390                    .get(index)
1391                    .with_context(|| format!("missing queue event for activated queue {index}"))?
1392                    .event
1393                    .try_clone()
1394                    .context("failed to clone queue event")?;
1395                activated_queues.insert(
1396                    index,
1397                    Queue::restore(
1398                        queue_config,
1399                        queue_snapshot,
1400                        &self.mem,
1401                        queue_evt,
1402                        interrupt.clone(),
1403                    )?,
1404                );
1405            }
1406
1407            // Restore the activated queues.
1408            self.sleep_state = Some(SleepState::Active { activated_queues });
1409        } else {
1410            self.sleep_state = Some(SleepState::Inactive);
1411        }
1412
1413        // Call register_io_events for the activated queue events.
1414        let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
1415        let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
1416        self.queues
1417            .iter()
1418            .enumerate()
1419            .zip(self.queue_evts.iter_mut())
1420            .filter(|((_, q), _)| q.ready())
1421            .try_for_each(|((queue_index, _queue), evt)| {
1422                if !evt.ioevent_registered {
1423                    self.ioevent_vm_memory_client
1424                        .register_io_event(
1425                            evt.event.try_clone().context("failed to clone Event")?,
1426                            notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
1427                            Datamatch::AnyLength,
1428                        )
1429                        .context("failed to register ioevent")?;
1430                    evt.ioevent_registered = true;
1431                }
1432                Ok::<(), anyhow::Error>(())
1433            })?;
1434
1435        // There might be data in the queue that wasn't drained by the device
1436        // at the time it was snapshotted. In this case, the doorbell should
1437        // still be signaled. If it is not, the driver may never re-trigger the
1438        // doorbell, and the device will stall. So here, we explicitly signal
1439        // every doorbell. Spurious doorbells are safe (devices will check their
1440        // queue, realize nothing is there, and go back to sleep.)
1441        self.queue_evts.iter_mut().try_for_each(|queue_event| {
1442            queue_event
1443                .event
1444                .signal()
1445                .context("failed to wake doorbell")
1446        })?;
1447
1448        self.device.virtio_restore(deser.inner_device)?;
1449
1450        Ok(())
1451    }
1452}
1453
1454struct VmRequester {
1455    vm_memory_client: VmMemoryClient,
1456    alloc: Alloc,
1457    mappings: BTreeMap<u64, VmMemoryRegionId>,
1458    prepare_type: SharedMemoryPrepareType,
1459    prepared: bool,
1460}
1461
1462impl VmRequester {
1463    fn new(
1464        vm_memory_client: VmMemoryClient,
1465        alloc: Alloc,
1466        prepare_type: SharedMemoryPrepareType,
1467    ) -> Self {
1468        Self {
1469            vm_memory_client,
1470            alloc,
1471            mappings: BTreeMap::new(),
1472            prepare_type,
1473            prepared: false,
1474        }
1475    }
1476}
1477
1478impl SharedMemoryMapper for VmRequester {
1479    fn add_mapping(
1480        &mut self,
1481        source: VmMemorySource,
1482        offset: u64,
1483        prot: Protection,
1484        cache: MemCacheType,
1485    ) -> anyhow::Result<()> {
1486        if !self.prepared {
1487            if let SharedMemoryPrepareType::SingleMappingOnFirst(prepare_cache_type) =
1488                self.prepare_type
1489            {
1490                debug!(
1491                    "lazy prepare_shared_memory_region with {:?}",
1492                    prepare_cache_type
1493                );
1494                self.vm_memory_client
1495                    .prepare_shared_memory_region(self.alloc, prepare_cache_type)
1496                    .context("lazy prepare_shared_memory_region failed")?;
1497            }
1498            self.prepared = true;
1499        }
1500
1501        // devices must implement VirtioDevice::get_shared_memory_prepare_type(), returning
1502        // SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheNonCoherent) in order to
1503        // add any mapping that requests MemCacheType::CacheNonCoherent.
1504        if cache == MemCacheType::CacheNonCoherent {
1505            if let SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheCoherent) =
1506                self.prepare_type
1507            {
1508                error!("invalid request to map with CacheNonCoherent for device with prepared CacheCoherent memory");
1509                return Err(anyhow!("invalid MemCacheType"));
1510            }
1511        }
1512
1513        let id = self
1514            .vm_memory_client
1515            .register_memory(
1516                source,
1517                VmMemoryDestination::ExistingAllocation {
1518                    allocation: self.alloc,
1519                    offset,
1520                },
1521                prot,
1522                cache,
1523            )
1524            .context("register_memory failed")?;
1525
1526        self.mappings.insert(offset, id);
1527        Ok(())
1528    }
1529
1530    fn remove_mapping(&mut self, offset: u64) -> anyhow::Result<()> {
1531        let id = self.mappings.remove(&offset).context("invalid offset")?;
1532        self.vm_memory_client
1533            .unregister_memory(id)
1534            .context("unregister_memory failed")
1535    }
1536
1537    fn as_raw_descriptor(&self) -> Option<RawDescriptor> {
1538        Some(self.vm_memory_client.as_raw_descriptor())
1539    }
1540}
1541
1542#[cfg(test)]
1543mod tests {
1544
1545    #[cfg(feature = "pci-hotplug")]
1546    #[test]
1547    fn allocate_aligned_address() {
1548        let mut simple_allocator = super::SimpleAllocator::new(0);
1549        // start at 0, aligned to 0x80. Interval end at 0x20.
1550        assert_eq!(simple_allocator.alloc(0x20, 0x80).unwrap(), 0);
1551        // 0x20 => start at 0x40. Interval end at 0x80.
1552        assert_eq!(simple_allocator.alloc(0x40, 0x40).unwrap(), 0x40);
1553        // 0x80 => start at 0x80, Interval end at 0x108.
1554        assert_eq!(simple_allocator.alloc(0x88, 0x80).unwrap(), 0x80);
1555        // 0x108 => start at 0x180. Interval end at 0x1b0.
1556        assert_eq!(simple_allocator.alloc(0x30, 0x80).unwrap(), 0x180);
1557    }
1558}