devices/virtio/
virtio_pci_device.rs

1// Copyright 2018 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::any::Any;
6use std::collections::BTreeMap;
7use std::sync::Arc;
8
9#[cfg(target_arch = "x86_64")]
10use acpi_tables::sdt::SDT;
11use anyhow::anyhow;
12use anyhow::Context;
13use base::debug;
14use base::error;
15use base::trace;
16use base::AsRawDescriptor;
17use base::AsRawDescriptors;
18use base::Event;
19use base::Protection;
20use base::RawDescriptor;
21use base::Result;
22use base::SharedMemory;
23use base::Tube;
24use base::WorkerThread;
25use data_model::Le32;
26use hypervisor::Datamatch;
27use hypervisor::MemCacheType;
28use libc::ERANGE;
29#[cfg(target_arch = "x86_64")]
30use metrics::MetricEventType;
31use resources::AddressRange;
32use resources::Alloc;
33use resources::AllocOptions;
34use resources::SystemAllocator;
35use serde::Deserialize;
36use serde::Serialize;
37use snapshot::AnySnapshot;
38use sync::Mutex;
39use virtio_sys::virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE;
40use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER;
41use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER_OK;
42use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FAILED;
43use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FEATURES_OK;
44use virtio_sys::virtio_config::VIRTIO_CONFIG_S_NEEDS_RESET;
45use virtio_sys::virtio_config::VIRTIO_CONFIG_S_SUSPEND;
46use vm_control::api::VmMemoryClient;
47use vm_control::PciId;
48use vm_control::VmMemoryDestination;
49use vm_control::VmMemoryRegionId;
50use vm_control::VmMemorySource;
51use vm_memory::GuestMemory;
52use zerocopy::FromBytes;
53use zerocopy::Immutable;
54use zerocopy::IntoBytes;
55use zerocopy::KnownLayout;
56
57use self::virtio_pci_common_config::VirtioPciCommonConfig;
58use super::*;
59#[cfg(target_arch = "x86_64")]
60use crate::acpi::PmWakeupEvent;
61#[cfg(target_arch = "x86_64")]
62use crate::pci::pm::PciDevicePower;
63use crate::pci::pm::PciPmCap;
64use crate::pci::pm::PmConfig;
65use crate::pci::pm::PmStatusChange;
66use crate::pci::BarRange;
67use crate::pci::MsixCap;
68use crate::pci::MsixConfig;
69use crate::pci::MsixStatus;
70use crate::pci::PciAddress;
71use crate::pci::PciBarConfiguration;
72use crate::pci::PciBarIndex;
73use crate::pci::PciBarPrefetchable;
74use crate::pci::PciBarRegionType;
75use crate::pci::PciBaseSystemPeripheralSubclass;
76use crate::pci::PciCapability;
77use crate::pci::PciCapabilityID;
78use crate::pci::PciClassCode;
79use crate::pci::PciConfiguration;
80use crate::pci::PciDevice;
81use crate::pci::PciDeviceError;
82use crate::pci::PciDisplaySubclass;
83use crate::pci::PciHeaderType;
84use crate::pci::PciInputDeviceSubclass;
85use crate::pci::PciInterruptPin;
86use crate::pci::PciMassStorageSubclass;
87use crate::pci::PciMultimediaSubclass;
88use crate::pci::PciNetworkControllerSubclass;
89use crate::pci::PciSimpleCommunicationControllerSubclass;
90use crate::pci::PciSubclass;
91use crate::pci::PciWirelessControllerSubclass;
92use crate::virtio::ipc_memory_mapper::IpcMemoryMapper;
93#[cfg(feature = "pci-hotplug")]
94use crate::HotPluggable;
95use crate::IrqLevelEvent;
96use crate::Suspendable;
97
98#[repr(u8)]
99#[derive(Debug, Copy, Clone, enumn::N)]
100pub enum PciCapabilityType {
101    CommonConfig = 1,
102    NotifyConfig = 2,
103    IsrConfig = 3,
104    DeviceConfig = 4,
105    PciConfig = 5,
106    // Doorbell, Notification and SharedMemory are Virtio Vhost User related PCI
107    // capabilities. Specified in 5.7.7.4 here
108    // https://stefanha.github.io/virtio/vhost-user-slave.html#x1-2830007.
109    DoorbellConfig = 6,
110    NotificationConfig = 7,
111    SharedMemoryConfig = 8,
112}
113
114#[allow(dead_code)]
115#[repr(C)]
116#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
117pub struct VirtioPciCap {
118    // cap_vndr and cap_next are autofilled based on id() in pci configuration
119    pub cap_vndr: u8, // Generic PCI field: PCI_CAP_ID_VNDR
120    pub cap_next: u8, // Generic PCI field: next ptr
121    pub cap_len: u8,  // Generic PCI field: capability length
122    pub cfg_type: u8, // Identifies the structure.
123    pub bar: u8,      // Where to find it.
124    id: u8,           // Multiple capabilities of the same type
125    padding: [u8; 2], // Pad to full dword.
126    pub offset: Le32, // Offset within bar.
127    pub length: Le32, // Length of the structure, in bytes.
128}
129
130impl PciCapability for VirtioPciCap {
131    fn bytes(&self) -> &[u8] {
132        self.as_bytes()
133    }
134
135    fn id(&self) -> PciCapabilityID {
136        PciCapabilityID::VendorSpecific
137    }
138
139    fn writable_bits(&self) -> Vec<u32> {
140        vec![0u32; 4]
141    }
142}
143
144impl VirtioPciCap {
145    pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self {
146        VirtioPciCap {
147            cap_vndr: 0,
148            cap_next: 0,
149            cap_len: std::mem::size_of::<VirtioPciCap>() as u8,
150            cfg_type: cfg_type as u8,
151            bar,
152            id: 0,
153            padding: [0; 2],
154            offset: Le32::from(offset),
155            length: Le32::from(length),
156        }
157    }
158
159    pub fn set_cap_len(&mut self, cap_len: u8) {
160        self.cap_len = cap_len;
161    }
162}
163
164#[allow(dead_code)]
165#[repr(C)]
166#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
167pub struct VirtioPciNotifyCap {
168    cap: VirtioPciCap,
169    notify_off_multiplier: Le32,
170}
171
172impl PciCapability for VirtioPciNotifyCap {
173    fn bytes(&self) -> &[u8] {
174        self.as_bytes()
175    }
176
177    fn id(&self) -> PciCapabilityID {
178        PciCapabilityID::VendorSpecific
179    }
180
181    fn writable_bits(&self) -> Vec<u32> {
182        vec![0u32; 5]
183    }
184}
185
186impl VirtioPciNotifyCap {
187    pub fn new(
188        cfg_type: PciCapabilityType,
189        bar: u8,
190        offset: u32,
191        length: u32,
192        multiplier: Le32,
193    ) -> Self {
194        VirtioPciNotifyCap {
195            cap: VirtioPciCap {
196                cap_vndr: 0,
197                cap_next: 0,
198                cap_len: std::mem::size_of::<VirtioPciNotifyCap>() as u8,
199                cfg_type: cfg_type as u8,
200                bar,
201                id: 0,
202                padding: [0; 2],
203                offset: Le32::from(offset),
204                length: Le32::from(length),
205            },
206            notify_off_multiplier: multiplier,
207        }
208    }
209}
210
211#[repr(C)]
212#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
213pub struct VirtioPciShmCap {
214    cap: VirtioPciCap,
215    offset_hi: Le32, // Most sig 32 bits of offset
216    length_hi: Le32, // Most sig 32 bits of length
217}
218
219impl PciCapability for VirtioPciShmCap {
220    fn bytes(&self) -> &[u8] {
221        self.as_bytes()
222    }
223
224    fn id(&self) -> PciCapabilityID {
225        PciCapabilityID::VendorSpecific
226    }
227
228    fn writable_bits(&self) -> Vec<u32> {
229        vec![0u32; 6]
230    }
231}
232
233impl VirtioPciShmCap {
234    pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self {
235        VirtioPciShmCap {
236            cap: VirtioPciCap {
237                cap_vndr: 0,
238                cap_next: 0,
239                cap_len: std::mem::size_of::<VirtioPciShmCap>() as u8,
240                cfg_type: cfg_type as u8,
241                bar,
242                id: shmid,
243                padding: [0; 2],
244                offset: Le32::from(offset as u32),
245                length: Le32::from(length as u32),
246            },
247            offset_hi: Le32::from((offset >> 32) as u32),
248            length_hi: Le32::from((length >> 32) as u32),
249        }
250    }
251}
252
253// Allocate one bar for the structs pointed to by the capability structures.
254const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
255const COMMON_CONFIG_SIZE: u64 = 56;
256const COMMON_CONFIG_LAST: u64 = COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE - 1;
257const ISR_CONFIG_BAR_OFFSET: u64 = 0x1000;
258const ISR_CONFIG_SIZE: u64 = 1;
259const ISR_CONFIG_LAST: u64 = ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE - 1;
260const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
261const DEVICE_CONFIG_SIZE: u64 = 0x1000;
262const DEVICE_CONFIG_LAST: u64 = DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE - 1;
263const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
264const NOTIFICATION_SIZE: u64 = 0x1000;
265const NOTIFICATION_LAST: u64 = NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE - 1;
266const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
267const MSIX_TABLE_SIZE: u64 = 0x1000;
268const MSIX_TABLE_LAST: u64 = MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE - 1;
269const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
270const MSIX_PBA_SIZE: u64 = 0x1000;
271const MSIX_PBA_LAST: u64 = MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE - 1;
272const CAPABILITY_BAR_SIZE: u64 = 0x8000;
273
274const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
275
276const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
277const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
278const VIRTIO_PCI_REVISION_ID: u8 = 1;
279
280const CAPABILITIES_BAR_NUM: usize = 0;
281const SHMEM_BAR_NUM: usize = 2;
282
283struct QueueEvent {
284    event: Event,
285    ioevent_registered: bool,
286}
287
288/// Implements the
289/// [PCI](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-650001)
290/// transport for virtio devices.
291pub struct VirtioPciDevice {
292    config_regs: PciConfiguration,
293    preferred_address: Option<PciAddress>,
294    pci_address: Option<PciAddress>,
295
296    device: Box<dyn VirtioDevice>,
297    device_activated: bool,
298    disable_intx: bool,
299
300    interrupt: Option<Interrupt>,
301    interrupt_evt: Option<IrqLevelEvent>,
302    interrupt_resample_worker: Option<WorkerThread<()>>,
303
304    queues: Vec<QueueConfig>,
305    queue_evts: Vec<QueueEvent>,
306    mem: GuestMemory,
307    settings_bar: PciBarIndex,
308    msix_config: Arc<Mutex<MsixConfig>>,
309    pm_config: Arc<Mutex<PmConfig>>,
310    common_config: VirtioPciCommonConfig,
311
312    iommu: Option<Arc<Mutex<IpcMemoryMapper>>>,
313
314    // API client that is present if the device has shared memory regions, and
315    // is used to map/unmap files into the shared memory region.
316    shared_memory_vm_memory_client: Option<VmMemoryClient>,
317
318    // API client for registration of ioevents when PCI BAR reprogramming is detected.
319    ioevent_vm_memory_client: VmMemoryClient,
320
321    // State only present while asleep.
322    sleep_state: Option<SleepState>,
323
324    vm_control_tube: Arc<Mutex<Tube>>,
325}
326
327enum SleepState {
328    // Asleep and device hasn't been activated yet by the guest.
329    Inactive,
330    // Asleep and device has been activated by the guest.
331    Active {
332        /// The queues returned from `VirtioDevice::virtio_sleep`.
333        /// Map is from queue index -> Queue.
334        activated_queues: BTreeMap<usize, Queue>,
335    },
336}
337
338#[derive(Serialize, Deserialize)]
339struct VirtioPciDeviceSnapshot {
340    config_regs: AnySnapshot,
341
342    inner_device: AnySnapshot,
343    device_activated: bool,
344
345    interrupt: Option<InterruptSnapshot>,
346    msix_config: AnySnapshot,
347    common_config: VirtioPciCommonConfig,
348
349    queues: Vec<AnySnapshot>,
350    activated_queues: Option<Vec<(usize, AnySnapshot)>>,
351}
352
353impl VirtioPciDevice {
354    /// Constructs a new PCI transport for the given virtio device.
355    pub fn new(
356        mem: GuestMemory,
357        device: Box<dyn VirtioDevice>,
358        msi_device_tube: Tube,
359        disable_intx: bool,
360        shared_memory_vm_memory_client: Option<VmMemoryClient>,
361        ioevent_vm_memory_client: VmMemoryClient,
362        vm_control_tube: Tube,
363    ) -> Result<Self> {
364        // shared_memory_vm_memory_client is required if there are shared memory regions.
365        assert_eq!(
366            device.get_shared_memory_region().is_none(),
367            shared_memory_vm_memory_client.is_none()
368        );
369
370        let mut queue_evts = Vec::new();
371        for _ in device.queue_max_sizes() {
372            queue_evts.push(QueueEvent {
373                event: Event::new()?,
374                ioevent_registered: false,
375            });
376        }
377        let queues = device
378            .queue_max_sizes()
379            .iter()
380            .map(|&s| QueueConfig::new(s, device.features()))
381            .collect();
382
383        let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
384
385        let (pci_device_class, pci_device_subclass) = match device.device_type() {
386            DeviceType::Net => (
387                PciClassCode::NetworkController,
388                &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
389            ),
390            DeviceType::Block => (
391                PciClassCode::MassStorage,
392                &PciMassStorageSubclass::Other as &dyn PciSubclass,
393            ),
394            DeviceType::Console => (
395                PciClassCode::SimpleCommunicationController,
396                &PciSimpleCommunicationControllerSubclass::Other as &dyn PciSubclass,
397            ),
398            DeviceType::Rng => (
399                PciClassCode::BaseSystemPeripheral,
400                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
401            ),
402            DeviceType::Balloon => (
403                PciClassCode::BaseSystemPeripheral,
404                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
405            ),
406            DeviceType::Scsi => (
407                PciClassCode::MassStorage,
408                &PciMassStorageSubclass::Scsi as &dyn PciSubclass,
409            ),
410            DeviceType::P9 => (
411                PciClassCode::NetworkController,
412                &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
413            ),
414            DeviceType::Gpu => (
415                PciClassCode::DisplayController,
416                &PciDisplaySubclass::Other as &dyn PciSubclass,
417            ),
418            DeviceType::Input => (
419                PciClassCode::InputDevice,
420                &PciInputDeviceSubclass::Other as &dyn PciSubclass,
421            ),
422            DeviceType::Vsock => (
423                PciClassCode::NetworkController,
424                &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
425            ),
426            DeviceType::Iommu => (
427                PciClassCode::BaseSystemPeripheral,
428                &PciBaseSystemPeripheralSubclass::Iommu as &dyn PciSubclass,
429            ),
430            DeviceType::Sound => (
431                PciClassCode::MultimediaController,
432                &PciMultimediaSubclass::AudioController as &dyn PciSubclass,
433            ),
434            DeviceType::Fs => (
435                PciClassCode::MassStorage,
436                &PciMassStorageSubclass::Other as &dyn PciSubclass,
437            ),
438            DeviceType::Pmem => (
439                PciClassCode::MassStorage,
440                &PciMassStorageSubclass::NonVolatileMemory as &dyn PciSubclass,
441            ),
442            DeviceType::Mac80211HwSim => (
443                PciClassCode::WirelessController,
444                &PciWirelessControllerSubclass::Other as &dyn PciSubclass,
445            ),
446            DeviceType::VideoEncoder => (
447                PciClassCode::MultimediaController,
448                &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
449            ),
450            DeviceType::VideoDecoder => (
451                PciClassCode::MultimediaController,
452                &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
453            ),
454            DeviceType::Media => (
455                PciClassCode::MultimediaController,
456                &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
457            ),
458            DeviceType::Scmi => (
459                PciClassCode::BaseSystemPeripheral,
460                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
461            ),
462            DeviceType::Wl => (
463                PciClassCode::DisplayController,
464                &PciDisplaySubclass::Other as &dyn PciSubclass,
465            ),
466            DeviceType::Tpm => (
467                PciClassCode::BaseSystemPeripheral,
468                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
469            ),
470            DeviceType::Pvclock => (
471                PciClassCode::BaseSystemPeripheral,
472                &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
473            ),
474        };
475
476        let num_interrupts = device.num_interrupts();
477
478        // One MSI-X vector per queue plus one for configuration changes.
479        let msix_num = u16::try_from(num_interrupts + 1).map_err(|_| base::Error::new(ERANGE))?;
480        let msix_config = Arc::new(Mutex::new(MsixConfig::new(
481            msix_num,
482            msi_device_tube,
483            PciId::new(VIRTIO_PCI_VENDOR_ID, pci_device_id).into(),
484            device.debug_label(),
485        )));
486
487        let config_regs = PciConfiguration::new(
488            VIRTIO_PCI_VENDOR_ID,
489            pci_device_id,
490            pci_device_class,
491            pci_device_subclass,
492            None,
493            PciHeaderType::Device,
494            VIRTIO_PCI_VENDOR_ID,
495            pci_device_id,
496            VIRTIO_PCI_REVISION_ID,
497        );
498
499        Ok(VirtioPciDevice {
500            config_regs,
501            preferred_address: device.pci_address(),
502            pci_address: None,
503            device,
504            device_activated: false,
505            disable_intx,
506            interrupt: None,
507            interrupt_evt: None,
508            interrupt_resample_worker: None,
509            queues,
510            queue_evts,
511            mem,
512            settings_bar: 0,
513            msix_config,
514            pm_config: Arc::new(Mutex::new(PmConfig::new(true))),
515            common_config: VirtioPciCommonConfig {
516                driver_status: 0,
517                config_generation: 0,
518                device_feature_select: 0,
519                driver_feature_select: 0,
520                queue_select: 0,
521                msix_config: VIRTIO_MSI_NO_VECTOR,
522            },
523            iommu: None,
524            shared_memory_vm_memory_client,
525            ioevent_vm_memory_client,
526            sleep_state: None,
527            vm_control_tube: Arc::new(Mutex::new(vm_control_tube)),
528        })
529    }
530
531    fn is_driver_ready(&self) -> bool {
532        let ready_bits = (VIRTIO_CONFIG_S_ACKNOWLEDGE
533            | VIRTIO_CONFIG_S_DRIVER
534            | VIRTIO_CONFIG_S_DRIVER_OK
535            | VIRTIO_CONFIG_S_FEATURES_OK) as u8;
536        (self.common_config.driver_status & ready_bits) == ready_bits
537            && self.common_config.driver_status & VIRTIO_CONFIG_S_FAILED as u8 == 0
538    }
539
540    fn is_device_suspended(&self) -> bool {
541        (self.common_config.driver_status & VIRTIO_CONFIG_S_SUSPEND as u8) != 0
542    }
543
544    /// Determines if the driver has requested the device reset itself
545    fn is_reset_requested(&self) -> bool {
546        self.common_config.driver_status == DEVICE_RESET as u8
547    }
548
549    fn add_settings_pci_capabilities(
550        &mut self,
551        settings_bar: u8,
552    ) -> std::result::Result<(), PciDeviceError> {
553        // Add pointers to the different configuration structures from the PCI capabilities.
554        let common_cap = VirtioPciCap::new(
555            PciCapabilityType::CommonConfig,
556            settings_bar,
557            COMMON_CONFIG_BAR_OFFSET as u32,
558            COMMON_CONFIG_SIZE as u32,
559        );
560        self.config_regs
561            .add_capability(&common_cap, None)
562            .map_err(PciDeviceError::CapabilitiesSetup)?;
563
564        let isr_cap = VirtioPciCap::new(
565            PciCapabilityType::IsrConfig,
566            settings_bar,
567            ISR_CONFIG_BAR_OFFSET as u32,
568            ISR_CONFIG_SIZE as u32,
569        );
570        self.config_regs
571            .add_capability(&isr_cap, None)
572            .map_err(PciDeviceError::CapabilitiesSetup)?;
573
574        // TODO(dgreid) - set based on device's configuration size?
575        let device_cap = VirtioPciCap::new(
576            PciCapabilityType::DeviceConfig,
577            settings_bar,
578            DEVICE_CONFIG_BAR_OFFSET as u32,
579            DEVICE_CONFIG_SIZE as u32,
580        );
581        self.config_regs
582            .add_capability(&device_cap, None)
583            .map_err(PciDeviceError::CapabilitiesSetup)?;
584
585        let notify_cap = VirtioPciNotifyCap::new(
586            PciCapabilityType::NotifyConfig,
587            settings_bar,
588            NOTIFICATION_BAR_OFFSET as u32,
589            NOTIFICATION_SIZE as u32,
590            Le32::from(NOTIFY_OFF_MULTIPLIER),
591        );
592        self.config_regs
593            .add_capability(&notify_cap, None)
594            .map_err(PciDeviceError::CapabilitiesSetup)?;
595
596        //TODO(dgreid) - How will the configuration_cap work?
597        let configuration_cap = VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0);
598        self.config_regs
599            .add_capability(&configuration_cap, None)
600            .map_err(PciDeviceError::CapabilitiesSetup)?;
601
602        let msix_cap = MsixCap::new(
603            settings_bar,
604            self.msix_config.lock().num_vectors(),
605            MSIX_TABLE_BAR_OFFSET as u32,
606            settings_bar,
607            MSIX_PBA_BAR_OFFSET as u32,
608        );
609        self.config_regs
610            .add_capability(&msix_cap, Some(Box::new(self.msix_config.clone())))
611            .map_err(PciDeviceError::CapabilitiesSetup)?;
612
613        self.config_regs
614            .add_capability(&PciPmCap::new(), Some(Box::new(self.pm_config.clone())))
615            .map_err(PciDeviceError::CapabilitiesSetup)?;
616
617        self.settings_bar = settings_bar as PciBarIndex;
618        Ok(())
619    }
620
621    /// Activates the underlying `VirtioDevice`. `assign_irq` has to be called first.
622    fn activate(&mut self) -> anyhow::Result<()> {
623        let interrupt = Interrupt::new(
624            self.interrupt_evt
625                .as_ref()
626                .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
627                .try_clone()
628                .with_context(|| format!("{} failed to clone interrupt_evt", self.debug_label()))?,
629            Some(self.msix_config.clone()),
630            self.common_config.msix_config,
631            #[cfg(target_arch = "x86_64")]
632            Some((
633                PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
634                MetricEventType::VirtioWakeup {
635                    virtio_id: self.device.device_type() as u32,
636                },
637            )),
638        );
639        self.interrupt = Some(interrupt.clone());
640        self.interrupt_resample_worker = interrupt.spawn_resample_thread();
641
642        let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
643        let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
644
645        // Use ready queues and their events.
646        let queues = self
647            .queues
648            .iter_mut()
649            .enumerate()
650            .zip(self.queue_evts.iter_mut())
651            .filter(|((_, q), _)| q.ready())
652            .map(|((queue_index, queue), evt)| {
653                if !evt.ioevent_registered {
654                    self.ioevent_vm_memory_client
655                        .register_io_event(
656                            evt.event.try_clone().context("failed to clone Event")?,
657                            notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
658                            Datamatch::AnyLength,
659                        )
660                        .context("failed to register ioevent")?;
661                    evt.ioevent_registered = true;
662                }
663                let queue_evt = evt.event.try_clone().context("failed to clone queue_evt")?;
664                Ok((
665                    queue_index,
666                    queue
667                        .activate(&self.mem, queue_evt, interrupt.clone())
668                        .context("failed to activate queue")?,
669                ))
670            })
671            .collect::<anyhow::Result<BTreeMap<usize, Queue>>>()?;
672
673        if let Err(e) = self.device.activate(self.mem.clone(), interrupt, queues) {
674            error!("{} activate failed: {:#}", self.debug_label(), e);
675            self.common_config.driver_status |= VIRTIO_CONFIG_S_NEEDS_RESET as u8;
676        } else {
677            self.device_activated = true;
678        }
679
680        Ok(())
681    }
682
683    fn unregister_ioevents(&mut self) -> anyhow::Result<()> {
684        let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
685        let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
686
687        for (queue_index, evt) in self.queue_evts.iter_mut().enumerate() {
688            if evt.ioevent_registered {
689                self.ioevent_vm_memory_client
690                    .unregister_io_event(
691                        evt.event.try_clone().context("failed to clone Event")?,
692                        notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
693                        Datamatch::AnyLength,
694                    )
695                    .context("failed to unregister ioevent")?;
696                evt.ioevent_registered = false;
697            }
698        }
699        Ok(())
700    }
701
702    pub fn virtio_device(&self) -> &dyn VirtioDevice {
703        self.device.as_ref()
704    }
705
706    pub fn pci_address(&self) -> Option<PciAddress> {
707        self.pci_address
708    }
709
710    #[cfg(target_arch = "x86_64")]
711    fn handle_pm_status_change(&mut self, status: &PmStatusChange) {
712        if let Some(interrupt) = self.interrupt.as_mut() {
713            interrupt.set_wakeup_event_active(status.to == PciDevicePower::D3)
714        }
715    }
716
717    #[cfg(not(target_arch = "x86_64"))]
718    fn handle_pm_status_change(&mut self, _status: &PmStatusChange) {}
719}
720
721impl PciDevice for VirtioPciDevice {
722    fn debug_label(&self) -> String {
723        format!("pci{}", self.device.debug_label())
724    }
725
726    fn preferred_address(&self) -> Option<PciAddress> {
727        self.preferred_address
728    }
729
730    fn allocate_address(
731        &mut self,
732        resources: &mut SystemAllocator,
733    ) -> std::result::Result<PciAddress, PciDeviceError> {
734        if self.pci_address.is_none() {
735            if let Some(address) = self.preferred_address {
736                if !resources.reserve_pci(address, self.debug_label()) {
737                    return Err(PciDeviceError::PciAllocationFailed);
738                }
739                self.pci_address = Some(address);
740            } else {
741                self.pci_address = resources.allocate_pci(0, self.debug_label());
742            }
743            self.msix_config
744                .lock()
745                .set_pci_address(self.pci_address.unwrap());
746        }
747        self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
748    }
749
750    fn keep_rds(&self) -> Vec<RawDescriptor> {
751        let mut rds = self.device.keep_rds();
752        rds.extend(
753            self.queue_evts
754                .iter()
755                .map(|qe| qe.event.as_raw_descriptor()),
756        );
757        if let Some(interrupt_evt) = &self.interrupt_evt {
758            rds.extend(interrupt_evt.as_raw_descriptors());
759        }
760        let descriptor = self.msix_config.lock().get_msi_socket();
761        rds.push(descriptor);
762        if let Some(iommu) = &self.iommu {
763            rds.append(&mut iommu.lock().as_raw_descriptors());
764        }
765        rds.push(self.ioevent_vm_memory_client.as_raw_descriptor());
766        rds.push(self.vm_control_tube.lock().as_raw_descriptor());
767        rds
768    }
769
770    fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
771        self.interrupt_evt = Some(irq_evt);
772        if !self.disable_intx {
773            self.config_regs.set_irq(irq_num as u8, pin);
774        }
775    }
776
777    fn allocate_io_bars(
778        &mut self,
779        resources: &mut SystemAllocator,
780    ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
781        let device_type = self.device.device_type();
782        allocate_io_bars(
783            self,
784            |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
785                resources
786                    .allocate_mmio(
787                        size,
788                        alloc,
789                        format!("virtio-{device_type}-cap_bar"),
790                        alloc_option,
791                    )
792                    .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
793            },
794        )
795    }
796
797    fn allocate_device_bars(
798        &mut self,
799        resources: &mut SystemAllocator,
800    ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
801        let device_type = self.device.device_type();
802        allocate_device_bars(
803            self,
804            |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
805                resources
806                    .allocate_mmio(
807                        size,
808                        alloc,
809                        format!("virtio-{device_type}-custom_bar"),
810                        alloc_option,
811                    )
812                    .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
813            },
814        )
815    }
816
817    fn destroy_device(&mut self) {
818        if let Err(e) = self.unregister_ioevents() {
819            error!("error destroying {}: {:?}", &self.debug_label(), &e);
820        }
821    }
822
823    fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
824        self.config_regs.get_bar_configuration(bar_num)
825    }
826
827    fn register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> {
828        let mut caps = self.device.get_device_caps();
829        if let Some(region) = self.device.get_shared_memory_region() {
830            caps.push(Box::new(VirtioPciShmCap::new(
831                PciCapabilityType::SharedMemoryConfig,
832                SHMEM_BAR_NUM as u8,
833                0,
834                region.length,
835                region.id,
836            )));
837        }
838
839        for cap in caps {
840            self.config_regs
841                .add_capability(&*cap, None)
842                .map_err(PciDeviceError::CapabilitiesSetup)?;
843        }
844
845        Ok(())
846    }
847
848    fn read_config_register(&self, reg_idx: usize) -> u32 {
849        self.config_regs.read_reg(reg_idx)
850    }
851
852    fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
853        if let Some(res) = self.config_regs.write_reg(reg_idx, offset, data) {
854            if let Some(msix_behavior) = <dyn Any>::downcast_ref::<MsixStatus>(&*res) {
855                self.device.control_notify(*msix_behavior);
856            } else if let Some(status) = <dyn Any>::downcast_ref::<PmStatusChange>(&*res) {
857                self.handle_pm_status_change(status);
858            }
859        }
860    }
861
862    fn setup_pci_config_mapping(
863        &mut self,
864        shmem: &SharedMemory,
865        base: usize,
866        len: usize,
867    ) -> std::result::Result<bool, PciDeviceError> {
868        self.config_regs
869            .setup_mapping(shmem, base, len)
870            .map(|_| true)
871            .map_err(PciDeviceError::MmioSetup)
872    }
873
874    fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
875        if bar_index == self.settings_bar {
876            match offset {
877                COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.read(
878                    offset - COMMON_CONFIG_BAR_OFFSET,
879                    data,
880                    &mut self.queues,
881                    self.device.as_mut(),
882                ),
883                ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
884                    if let Some(v) = data.get_mut(0) {
885                        // Reading this register resets it to 0.
886                        *v = if let Some(interrupt) = &self.interrupt {
887                            interrupt.read_and_reset_interrupt_status()
888                        } else {
889                            0
890                        };
891                    }
892                }
893                DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
894                    self.device
895                        .read_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
896                }
897                NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
898                    // Handled with ioevents.
899                }
900                MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
901                    self.msix_config
902                        .lock()
903                        .read_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
904                }
905                MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
906                    self.msix_config
907                        .lock()
908                        .read_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
909                }
910                _ => (),
911            }
912        }
913    }
914
915    fn write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8]) {
916        let was_suspended = self.is_device_suspended();
917
918        if bar_index == self.settings_bar {
919            match offset {
920                COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.write(
921                    offset - COMMON_CONFIG_BAR_OFFSET,
922                    data,
923                    &mut self.queues,
924                    self.device.as_mut(),
925                ),
926                ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
927                    if let Some(v) = data.first() {
928                        if let Some(interrupt) = &self.interrupt {
929                            interrupt.clear_interrupt_status_bits(*v);
930                        }
931                    }
932                }
933                DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
934                    self.device
935                        .write_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
936                }
937                NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
938                    // Notifications are normally handled with ioevents inside the hypervisor and
939                    // do not reach write_bar(). However, if the ioevent registration hasn't
940                    // finished yet, it is possible for a write to the notification region to make
941                    // it through as a normal MMIO exit and end up here. To handle that case,
942                    // provide a fallback that looks up the corresponding queue for the offset and
943                    // triggers its event, which is equivalent to what the ioevent would do.
944                    let queue_index = (offset - NOTIFICATION_BAR_OFFSET) as usize
945                        / NOTIFY_OFF_MULTIPLIER as usize;
946                    trace!("write_bar notification fallback for queue {}", queue_index);
947                    if let Some(evt) = self.queue_evts.get(queue_index) {
948                        let _ = evt.event.signal();
949                    }
950                }
951                MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
952                    let behavior = self
953                        .msix_config
954                        .lock()
955                        .write_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
956                    self.device.control_notify(behavior);
957                }
958                MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
959                    self.msix_config
960                        .lock()
961                        .write_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
962                }
963                _ => (),
964            }
965        }
966
967        if !self.device_activated && self.is_driver_ready() {
968            if let Err(e) = self.activate() {
969                error!("failed to activate device: {:#}", e);
970            }
971        }
972
973        let is_suspended = self.is_device_suspended();
974        if is_suspended != was_suspended {
975            if let Some(interrupt) = self.interrupt.as_mut() {
976                interrupt.set_suspended(is_suspended);
977            }
978        }
979
980        // Device has been reset by the driver
981        if self.device_activated && self.is_reset_requested() {
982            if let Err(e) = self.device.reset() {
983                error!("failed to reset {} device: {:#}", self.debug_label(), e);
984            } else {
985                self.device_activated = false;
986                // reset queues
987                self.queues.iter_mut().for_each(QueueConfig::reset);
988                // select queue 0 by default
989                self.common_config.queue_select = 0;
990                if let Err(e) = self.unregister_ioevents() {
991                    error!("failed to unregister ioevents: {:#}", e);
992                }
993                if let Some(interrupt_resample_worker) = self.interrupt_resample_worker.take() {
994                    interrupt_resample_worker.stop();
995                }
996            }
997        }
998    }
999
1000    fn on_device_sandboxed(&mut self) {
1001        self.device.on_device_sandboxed();
1002    }
1003
1004    #[cfg(target_arch = "x86_64")]
1005    fn generate_acpi(&mut self, sdts: &mut Vec<SDT>) -> anyhow::Result<()> {
1006        self.device.generate_acpi(
1007            self.pci_address.expect("pci_address must be assigned"),
1008            sdts,
1009        )
1010    }
1011
1012    fn as_virtio_pci_device(&self) -> Option<&VirtioPciDevice> {
1013        Some(self)
1014    }
1015}
1016
1017fn allocate_io_bars<F>(
1018    virtio_pci_device: &mut VirtioPciDevice,
1019    mut alloc_fn: F,
1020) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1021where
1022    F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1023{
1024    let address = virtio_pci_device
1025        .pci_address
1026        .expect("allocate_address must be called prior to allocate_io_bars");
1027    // Allocate one bar for the structures pointed to by the capability structures.
1028    let settings_config_addr = alloc_fn(
1029        CAPABILITY_BAR_SIZE,
1030        Alloc::PciBar {
1031            bus: address.bus,
1032            dev: address.dev,
1033            func: address.func,
1034            bar: 0,
1035        },
1036        AllocOptions::new()
1037            .max_address(u32::MAX.into())
1038            .align(CAPABILITY_BAR_SIZE),
1039    )?;
1040    let config = PciBarConfiguration::new(
1041        CAPABILITIES_BAR_NUM,
1042        CAPABILITY_BAR_SIZE,
1043        PciBarRegionType::Memory32BitRegion,
1044        PciBarPrefetchable::NotPrefetchable,
1045    )
1046    .set_address(settings_config_addr);
1047    let settings_bar = virtio_pci_device
1048        .config_regs
1049        .add_pci_bar(config)
1050        .map_err(|e| PciDeviceError::IoRegistrationFailed(settings_config_addr, e))?
1051        as u8;
1052    // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
1053    virtio_pci_device.add_settings_pci_capabilities(settings_bar)?;
1054
1055    Ok(vec![BarRange {
1056        addr: settings_config_addr,
1057        size: CAPABILITY_BAR_SIZE,
1058        prefetchable: false,
1059    }])
1060}
1061
1062fn allocate_device_bars<F>(
1063    virtio_pci_device: &mut VirtioPciDevice,
1064    mut alloc_fn: F,
1065) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1066where
1067    F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1068{
1069    let address = virtio_pci_device
1070        .pci_address
1071        .expect("allocate_address must be called prior to allocate_device_bars");
1072
1073    let configs = virtio_pci_device.device.get_device_bars(address);
1074    let configs = if !configs.is_empty() {
1075        configs
1076    } else {
1077        let region = match virtio_pci_device.device.get_shared_memory_region() {
1078            None => return Ok(Vec::new()),
1079            Some(r) => r,
1080        };
1081        let config = PciBarConfiguration::new(
1082            SHMEM_BAR_NUM,
1083            region
1084                .length
1085                .checked_next_power_of_two()
1086                .expect("bar too large"),
1087            PciBarRegionType::Memory64BitRegion,
1088            PciBarPrefetchable::Prefetchable,
1089        );
1090
1091        let alloc = Alloc::PciBar {
1092            bus: address.bus,
1093            dev: address.dev,
1094            func: address.func,
1095            bar: config.bar_index() as u8,
1096        };
1097
1098        let vm_memory_client = virtio_pci_device
1099            .shared_memory_vm_memory_client
1100            .take()
1101            .expect("missing shared_memory_tube");
1102
1103        // See comment VmMemoryRequest::execute
1104        let can_prepare = !virtio_pci_device
1105            .device
1106            .expose_shmem_descriptors_with_viommu();
1107        let prepare_type = if can_prepare {
1108            virtio_pci_device.device.get_shared_memory_prepare_type()
1109        } else {
1110            SharedMemoryPrepareType::DynamicPerMapping
1111        };
1112
1113        let vm_requester = Box::new(VmRequester::new(vm_memory_client, alloc, prepare_type));
1114        virtio_pci_device
1115            .device
1116            .set_shared_memory_mapper(vm_requester);
1117
1118        vec![config]
1119    };
1120    let mut ranges = vec![];
1121    for config in configs {
1122        let device_addr = alloc_fn(
1123            config.size(),
1124            Alloc::PciBar {
1125                bus: address.bus,
1126                dev: address.dev,
1127                func: address.func,
1128                bar: config.bar_index() as u8,
1129            },
1130            AllocOptions::new()
1131                .prefetchable(config.is_prefetchable())
1132                .align(config.size()),
1133        )?;
1134        let config = config.set_address(device_addr);
1135        let _device_bar = virtio_pci_device
1136            .config_regs
1137            .add_pci_bar(config)
1138            .map_err(|e| PciDeviceError::IoRegistrationFailed(device_addr, e))?;
1139        ranges.push(BarRange {
1140            addr: device_addr,
1141            size: config.size(),
1142            prefetchable: false,
1143        });
1144    }
1145
1146    if virtio_pci_device
1147        .device
1148        .get_shared_memory_region()
1149        .is_some()
1150    {
1151        let shmem_region = AddressRange::from_start_and_size(ranges[0].addr, ranges[0].size)
1152            .expect("invalid shmem region");
1153        virtio_pci_device
1154            .device
1155            .set_shared_memory_region(shmem_region);
1156    }
1157
1158    Ok(ranges)
1159}
1160
1161#[cfg(feature = "pci-hotplug")]
1162impl HotPluggable for VirtioPciDevice {
1163    /// Sets PciAddress to pci_addr
1164    fn set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError> {
1165        self.pci_address = Some(pci_addr);
1166        self.msix_config
1167            .lock()
1168            .set_pci_address(self.pci_address.unwrap());
1169        Ok(())
1170    }
1171
1172    /// Configures IO BAR layout without memory alloc.
1173    fn configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1174        let mut simple_allocator = SimpleAllocator::new(0);
1175        allocate_io_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1176    }
1177
1178    /// Configure device BAR layout without memory alloc.
1179    fn configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1180        // For device BAR, the space for CAPABILITY_BAR_SIZE should be skipped.
1181        let mut simple_allocator = SimpleAllocator::new(CAPABILITY_BAR_SIZE);
1182        allocate_device_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1183    }
1184}
1185
1186#[cfg(feature = "pci-hotplug")]
1187/// A simple allocator that can allocate non-overlapping aligned intervals.
1188///
1189/// The addresses allocated are not exclusively reserved for the device, and cannot be used for a
1190/// static device. The allocated placeholder address describes the layout of PCI BAR for hotplugged
1191/// devices. Actual memory allocation is handled by PCI BAR reprogramming initiated by guest OS.
1192struct SimpleAllocator {
1193    current_address: u64,
1194}
1195
1196#[cfg(feature = "pci-hotplug")]
1197impl SimpleAllocator {
1198    /// Constructs SimpleAllocator. Address will start at or after base_address.
1199    fn new(base_address: u64) -> Self {
1200        Self {
1201            current_address: base_address,
1202        }
1203    }
1204
1205    /// Allocate memory with size and align. Returns the start of address.
1206    fn alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError> {
1207        if align > 0 {
1208            // aligns current_address upward to align.
1209            self.current_address = self.current_address.next_multiple_of(align);
1210        }
1211        let start_address = self.current_address;
1212        self.current_address += size;
1213        Ok(start_address)
1214    }
1215}
1216
1217impl Suspendable for VirtioPciDevice {
1218    fn sleep(&mut self) -> anyhow::Result<()> {
1219        // If the device is already asleep, we should not request it to sleep again.
1220        if self.sleep_state.is_some() {
1221            return Ok(());
1222        }
1223
1224        if let Some(queues) = self.device.virtio_sleep()? {
1225            anyhow::ensure!(
1226                self.device_activated,
1227                format!(
1228                    "unactivated device {} returned queues on sleep",
1229                    self.debug_label()
1230                ),
1231            );
1232            self.sleep_state = Some(SleepState::Active {
1233                activated_queues: queues,
1234            });
1235        } else {
1236            anyhow::ensure!(
1237                !self.device_activated,
1238                format!(
1239                    "activated device {} didn't return queues on sleep",
1240                    self.debug_label()
1241                ),
1242            );
1243            self.sleep_state = Some(SleepState::Inactive);
1244        }
1245        Ok(())
1246    }
1247
1248    fn wake(&mut self) -> anyhow::Result<()> {
1249        match self.sleep_state.take() {
1250            None => {
1251                // If the device is already awake, we should not request it to wake again.
1252            }
1253            Some(SleepState::Inactive) => {
1254                self.device.virtio_wake(None).with_context(|| {
1255                    format!(
1256                        "virtio_wake failed for {}, can't recover",
1257                        self.debug_label(),
1258                    )
1259                })?;
1260            }
1261            Some(SleepState::Active { activated_queues }) => {
1262                self.device
1263                    .virtio_wake(Some((
1264                        self.mem.clone(),
1265                        self.interrupt
1266                            .clone()
1267                            .expect("interrupt missing for already active queues"),
1268                        activated_queues,
1269                    )))
1270                    .with_context(|| {
1271                        format!(
1272                            "virtio_wake failed for {}, can't recover",
1273                            self.debug_label(),
1274                        )
1275                    })?;
1276            }
1277        };
1278        Ok(())
1279    }
1280
1281    fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
1282        if self.iommu.is_some() {
1283            return Err(anyhow!("Cannot snapshot if iommu is present."));
1284        }
1285
1286        AnySnapshot::to_any(VirtioPciDeviceSnapshot {
1287            config_regs: self.config_regs.snapshot()?,
1288            inner_device: self.device.virtio_snapshot()?,
1289            device_activated: self.device_activated,
1290            interrupt: self.interrupt.as_ref().map(|i| i.snapshot()),
1291            msix_config: self.msix_config.lock().snapshot()?,
1292            common_config: self.common_config,
1293            queues: self
1294                .queues
1295                .iter()
1296                .map(|q| q.snapshot())
1297                .collect::<anyhow::Result<Vec<_>>>()?,
1298            activated_queues: match &self.sleep_state {
1299                None => {
1300                    anyhow::bail!("tried snapshotting while awake")
1301                }
1302                Some(SleepState::Inactive) => None,
1303                Some(SleepState::Active { activated_queues }) => {
1304                    let mut serialized_queues = Vec::new();
1305                    for (index, queue) in activated_queues.iter() {
1306                        serialized_queues.push((*index, queue.snapshot()?));
1307                    }
1308                    Some(serialized_queues)
1309                }
1310            },
1311        })
1312        .context("failed to serialize VirtioPciDeviceSnapshot")
1313    }
1314
1315    fn restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
1316        // Restoring from an activated state is more complex and low priority, so just fail for
1317        // now. We'll need to reset the device before restoring, e.g. must call
1318        // self.unregister_ioevents().
1319        anyhow::ensure!(
1320            !self.device_activated,
1321            "tried to restore after virtio device activated. not supported yet"
1322        );
1323
1324        let deser: VirtioPciDeviceSnapshot = AnySnapshot::from_any(data)?;
1325
1326        self.config_regs.restore(deser.config_regs)?;
1327        self.device_activated = deser.device_activated;
1328
1329        self.msix_config.lock().restore(deser.msix_config)?;
1330        self.common_config = deser.common_config;
1331
1332        // Restore the interrupt. This must be done after restoring the MSI-X configuration, but
1333        // before restoring the queues.
1334        if let Some(deser_interrupt) = deser.interrupt {
1335            let interrupt = Interrupt::new_from_snapshot(
1336                self.interrupt_evt
1337                    .as_ref()
1338                    .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
1339                    .try_clone()
1340                    .with_context(|| {
1341                        format!("{} failed to clone interrupt_evt", self.debug_label())
1342                    })?,
1343                Some(self.msix_config.clone()),
1344                self.common_config.msix_config,
1345                deser_interrupt,
1346                #[cfg(target_arch = "x86_64")]
1347                Some((
1348                    PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
1349                    MetricEventType::VirtioWakeup {
1350                        virtio_id: self.device.device_type() as u32,
1351                    },
1352                )),
1353            );
1354            self.interrupt_resample_worker = interrupt.spawn_resample_thread();
1355            self.interrupt = Some(interrupt);
1356        }
1357
1358        assert_eq!(
1359            self.queues.len(),
1360            deser.queues.len(),
1361            "device must have the same number of queues"
1362        );
1363        for (q, s) in self.queues.iter_mut().zip(deser.queues.into_iter()) {
1364            q.restore(s)?;
1365        }
1366
1367        // Verify we are asleep and inactive.
1368        match &self.sleep_state {
1369            None => {
1370                anyhow::bail!("tried restoring while awake")
1371            }
1372            Some(SleepState::Inactive) => {}
1373            Some(SleepState::Active { .. }) => {
1374                anyhow::bail!("tried to restore after virtio device activated. not supported yet")
1375            }
1376        };
1377        // Restore `sleep_state`.
1378        if let Some(activated_queues_snapshot) = deser.activated_queues {
1379            let interrupt = self
1380                .interrupt
1381                .as_ref()
1382                .context("tried to restore active queues without an interrupt")?;
1383            let mut activated_queues = BTreeMap::new();
1384            for (index, queue_snapshot) in activated_queues_snapshot {
1385                let queue_config = self
1386                    .queues
1387                    .get(index)
1388                    .with_context(|| format!("missing queue config for activated queue {index}"))?;
1389                let queue_evt = self
1390                    .queue_evts
1391                    .get(index)
1392                    .with_context(|| format!("missing queue event for activated queue {index}"))?
1393                    .event
1394                    .try_clone()
1395                    .context("failed to clone queue event")?;
1396                activated_queues.insert(
1397                    index,
1398                    Queue::restore(
1399                        queue_config,
1400                        queue_snapshot,
1401                        &self.mem,
1402                        queue_evt,
1403                        interrupt.clone(),
1404                    )?,
1405                );
1406            }
1407
1408            // Restore the activated queues.
1409            self.sleep_state = Some(SleepState::Active { activated_queues });
1410        } else {
1411            self.sleep_state = Some(SleepState::Inactive);
1412        }
1413
1414        // Call register_io_events for the activated queue events.
1415        let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
1416        let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
1417        self.queues
1418            .iter()
1419            .enumerate()
1420            .zip(self.queue_evts.iter_mut())
1421            .filter(|((_, q), _)| q.ready())
1422            .try_for_each(|((queue_index, _queue), evt)| {
1423                if !evt.ioevent_registered {
1424                    self.ioevent_vm_memory_client
1425                        .register_io_event(
1426                            evt.event.try_clone().context("failed to clone Event")?,
1427                            notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
1428                            Datamatch::AnyLength,
1429                        )
1430                        .context("failed to register ioevent")?;
1431                    evt.ioevent_registered = true;
1432                }
1433                Ok::<(), anyhow::Error>(())
1434            })?;
1435
1436        // There might be data in the queue that wasn't drained by the device
1437        // at the time it was snapshotted. In this case, the doorbell should
1438        // still be signaled. If it is not, the driver may never re-trigger the
1439        // doorbell, and the device will stall. So here, we explicitly signal
1440        // every doorbell. Spurious doorbells are safe (devices will check their
1441        // queue, realize nothing is there, and go back to sleep.)
1442        self.queue_evts.iter_mut().try_for_each(|queue_event| {
1443            queue_event
1444                .event
1445                .signal()
1446                .context("failed to wake doorbell")
1447        })?;
1448
1449        self.device.virtio_restore(deser.inner_device)?;
1450
1451        Ok(())
1452    }
1453}
1454
1455struct VmRequester {
1456    vm_memory_client: VmMemoryClient,
1457    alloc: Alloc,
1458    mappings: BTreeMap<u64, VmMemoryRegionId>,
1459    prepare_type: SharedMemoryPrepareType,
1460    prepared: bool,
1461}
1462
1463impl VmRequester {
1464    fn new(
1465        vm_memory_client: VmMemoryClient,
1466        alloc: Alloc,
1467        prepare_type: SharedMemoryPrepareType,
1468    ) -> Self {
1469        Self {
1470            vm_memory_client,
1471            alloc,
1472            mappings: BTreeMap::new(),
1473            prepare_type,
1474            prepared: false,
1475        }
1476    }
1477}
1478
1479impl SharedMemoryMapper for VmRequester {
1480    fn add_mapping(
1481        &mut self,
1482        source: VmMemorySource,
1483        offset: u64,
1484        prot: Protection,
1485        cache: MemCacheType,
1486    ) -> anyhow::Result<()> {
1487        if !self.prepared {
1488            if let SharedMemoryPrepareType::SingleMappingOnFirst(prepare_cache_type) =
1489                self.prepare_type
1490            {
1491                debug!(
1492                    "lazy prepare_shared_memory_region with {:?}",
1493                    prepare_cache_type
1494                );
1495                self.vm_memory_client
1496                    .prepare_shared_memory_region(self.alloc, prepare_cache_type)
1497                    .context("lazy prepare_shared_memory_region failed")?;
1498            }
1499            self.prepared = true;
1500        }
1501
1502        // devices must implement VirtioDevice::get_shared_memory_prepare_type(), returning
1503        // SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheNonCoherent) in order to
1504        // add any mapping that requests MemCacheType::CacheNonCoherent.
1505        if cache == MemCacheType::CacheNonCoherent {
1506            if let SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheCoherent) =
1507                self.prepare_type
1508            {
1509                error!("invalid request to map with CacheNonCoherent for device with prepared CacheCoherent memory");
1510                return Err(anyhow!("invalid MemCacheType"));
1511            }
1512        }
1513
1514        let id = self
1515            .vm_memory_client
1516            .register_memory(
1517                source,
1518                VmMemoryDestination::ExistingAllocation {
1519                    allocation: self.alloc,
1520                    offset,
1521                },
1522                prot,
1523                cache,
1524            )
1525            .context("register_memory failed")?;
1526
1527        self.mappings.insert(offset, id);
1528        Ok(())
1529    }
1530
1531    fn remove_mapping(&mut self, offset: u64) -> anyhow::Result<()> {
1532        let id = self.mappings.remove(&offset).context("invalid offset")?;
1533        self.vm_memory_client
1534            .unregister_memory(id)
1535            .context("unregister_memory failed")
1536    }
1537
1538    fn as_raw_descriptor(&self) -> Option<RawDescriptor> {
1539        Some(self.vm_memory_client.as_raw_descriptor())
1540    }
1541}
1542
1543#[cfg(test)]
1544mod tests {
1545
1546    #[cfg(feature = "pci-hotplug")]
1547    #[test]
1548    fn allocate_aligned_address() {
1549        let mut simple_allocator = super::SimpleAllocator::new(0);
1550        // start at 0, aligned to 0x80. Interval end at 0x20.
1551        assert_eq!(simple_allocator.alloc(0x20, 0x80).unwrap(), 0);
1552        // 0x20 => start at 0x40. Interval end at 0x80.
1553        assert_eq!(simple_allocator.alloc(0x40, 0x40).unwrap(), 0x40);
1554        // 0x80 => start at 0x80, Interval end at 0x108.
1555        assert_eq!(simple_allocator.alloc(0x88, 0x80).unwrap(), 0x80);
1556        // 0x108 => start at 0x180. Interval end at 0x1b0.
1557        assert_eq!(simple_allocator.alloc(0x30, 0x80).unwrap(), 0x180);
1558    }
1559}