devices/pci/
msix.rs

1// Copyright 2019 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::convert::TryInto;
6
7use anyhow::Context;
8use base::error;
9use base::info;
10use base::AsRawDescriptor;
11use base::Error as SysError;
12use base::Event;
13use base::RawDescriptor;
14use base::Tube;
15use base::TubeError;
16use bit_field::*;
17use remain::sorted;
18use serde::Deserialize;
19use serde::Serialize;
20use snapshot::AnySnapshot;
21use thiserror::Error;
22use vm_control::PciId;
23use vm_control::VmIrqRequest;
24use vm_control::VmIrqResponse;
25use zerocopy::FromBytes;
26use zerocopy::Immutable;
27use zerocopy::IntoBytes;
28use zerocopy::KnownLayout;
29
30use crate::pci::pci_configuration::PciCapConfig;
31use crate::pci::pci_configuration::PciCapConfigWriteResult;
32use crate::pci::PciCapability;
33use crate::pci::PciCapabilityID;
34
35const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
36pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
37pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
38pub const BITS_PER_PBA_ENTRY: usize = 64;
39const FUNCTION_MASK_BIT: u16 = 0x4000;
40const MSIX_ENABLE_BIT: u16 = 0x8000;
41const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1;
42
43#[derive(Serialize, Deserialize, Clone, Default)]
44struct MsixTableEntry {
45    msg_addr_lo: u32,
46    msg_addr_hi: u32,
47    msg_data: u32,
48    vector_ctl: u32,
49}
50
51impl MsixTableEntry {
52    fn masked(&self) -> bool {
53        self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT
54    }
55}
56
57struct IrqfdGsi {
58    irqfd: Event,
59    gsi: u32,
60}
61
62/// Wrapper over MSI-X Capability Structure and MSI-X Tables
63pub struct MsixConfig {
64    table_entries: Vec<MsixTableEntry>,
65    pba_entries: Vec<u64>,
66    irq_vec: Vec<Option<IrqfdGsi>>,
67    masked: bool,
68    enabled: bool,
69    msi_device_socket: Tube,
70    msix_num: u16,
71    pci_id: u32,
72    pci_address: Option<resources::PciAddress>,
73    device_name: String,
74}
75
76#[derive(Serialize, Deserialize)]
77struct MsixConfigSnapshot {
78    table_entries: Vec<MsixTableEntry>,
79    pba_entries: Vec<u64>,
80    /// Just like MsixConfig::irq_vec, but only the GSI.
81    irq_gsi_vec: Vec<Option<u32>>,
82    masked: bool,
83    enabled: bool,
84    msix_num: u16,
85    pci_id: u32,
86    pci_address: Option<resources::PciAddress>,
87    device_name: String,
88}
89
90#[sorted]
91#[derive(Error, Debug)]
92pub enum MsixError {
93    #[error("AddMsiRoute failed: {0}")]
94    AddMsiRoute(SysError),
95    #[error("failed to receive AddMsiRoute response: {0}")]
96    AddMsiRouteRecv(TubeError),
97    #[error("failed to send AddMsiRoute request: {0}")]
98    AddMsiRouteSend(TubeError),
99    #[error("AllocateOneMsi failed: {0}")]
100    AllocateOneMsi(SysError),
101    #[error("failed to receive AllocateOneMsi response: {0}")]
102    AllocateOneMsiRecv(TubeError),
103    #[error("failed to send AllocateOneMsi request: {0}")]
104    AllocateOneMsiSend(TubeError),
105    #[error("failed to deserialize snapshot: {0}")]
106    DeserializationFailed(anyhow::Error),
107    #[error("invalid vector length in snapshot: {0}")]
108    InvalidVectorLength(std::num::TryFromIntError),
109    #[error("ReleaseOneIrq failed: {0}")]
110    ReleaseOneIrq(base::Error),
111    #[error("failed to receive ReleaseOneIrq response: {0}")]
112    ReleaseOneIrqRecv(TubeError),
113    #[error("failed to send ReleaseOneIrq request: {0}")]
114    ReleaseOneIrqSend(TubeError),
115}
116
117type MsixResult<T> = std::result::Result<T, MsixError>;
118
119#[derive(Copy, Clone)]
120pub enum MsixStatus {
121    Changed,
122    EntryChanged(usize),
123    NothingToDo,
124}
125
126impl PciCapConfigWriteResult for MsixStatus {}
127
128impl MsixConfig {
129    pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self {
130        assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
131
132        let mut table_entries: Vec<MsixTableEntry> = Vec::new();
133        table_entries.resize_with(msix_vectors as usize, Default::default);
134        table_entries
135            .iter_mut()
136            .for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT);
137        let mut pba_entries: Vec<u64> = Vec::new();
138        let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY);
139        pba_entries.resize_with(num_pba_entries, Default::default);
140
141        let mut irq_vec = Vec::new();
142        irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>);
143
144        MsixConfig {
145            table_entries,
146            pba_entries,
147            irq_vec,
148            masked: false,
149            enabled: false,
150            msi_device_socket: vm_socket,
151            msix_num: msix_vectors,
152            pci_id,
153            pci_address: None,
154            device_name,
155        }
156    }
157
158    /// PCI address of the associated device.
159    pub fn set_pci_address(&mut self, pci_address: resources::PciAddress) {
160        self.pci_address = Some(pci_address);
161    }
162
163    /// Get the number of MSI-X vectors in this configuration.
164    pub fn num_vectors(&self) -> u16 {
165        self.msix_num
166    }
167
168    /// Check whether the Function Mask bit in Message Control word in set or not.
169    /// if 1, all of the vectors associated with the function are masked,
170    /// regardless of their per-vector Mask bit states.
171    /// If 0, each vector's Mask bit determines whether the vector is masked or not.
172    pub fn masked(&self) -> bool {
173        self.masked
174    }
175
176    /// Check whether the Function Mask bit in MSIX table Message Control
177    /// word in set or not.
178    /// If true, the vector is masked.
179    /// If false, the vector is unmasked.
180    pub fn table_masked(&self, index: usize) -> bool {
181        if index >= self.table_entries.len() {
182            true
183        } else {
184            self.table_entries[index].masked()
185        }
186    }
187
188    /// Check whether the MSI-X Enable bit in Message Control word in set or not.
189    /// if 1, the function is permitted to use MSI-X to request service.
190    pub fn enabled(&self) -> bool {
191        self.enabled
192    }
193
194    /// Read the MSI-X Capability Structure.
195    /// The top 2 bits in Message Control word are emulated and all other
196    /// bits are read only.
197    pub fn read_msix_capability(&self, data: u32) -> u32 {
198        let mut msg_ctl = (data >> 16) as u16;
199        msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT);
200
201        if self.enabled {
202            msg_ctl |= MSIX_ENABLE_BIT;
203        }
204        if self.masked {
205            msg_ctl |= FUNCTION_MASK_BIT;
206        }
207        (msg_ctl as u32) << 16 | (data & u16::MAX as u32)
208    }
209
210    /// Write to the MSI-X Capability Structure.
211    /// Only the top 2 bits in Message Control Word are writable.
212    pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
213        if offset == 2 && data.len() == 2 {
214            let reg = u16::from_le_bytes([data[0], data[1]]);
215            let old_masked = self.masked;
216            let old_enabled = self.enabled;
217
218            self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT;
219            self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT;
220
221            if !old_enabled && self.enabled {
222                if let Err(e) = self.msix_enable_all() {
223                    error!("failed to enable MSI-X: {}", e);
224                    self.enabled = false;
225                }
226            }
227
228            // If the Function Mask bit was set, and has just been cleared, it's
229            // important to go through the entire PBA to check if there was any
230            // pending MSI-X message to inject, given that the vector is not
231            // masked.
232            if old_masked && !self.masked {
233                for (index, entry) in self.table_entries.clone().iter().enumerate() {
234                    if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
235                        self.inject_msix_and_clear_pba(index);
236                    }
237                }
238                return MsixStatus::Changed;
239            } else if !old_masked && self.masked {
240                return MsixStatus::Changed;
241            }
242        } else {
243            error!(
244                "invalid write to MSI-X Capability Structure offset {:x}",
245                offset
246            );
247        }
248        MsixStatus::NothingToDo
249    }
250
251    /// Create a snapshot of the current MsixConfig struct for use in
252    /// snapshotting.
253    pub fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
254        AnySnapshot::to_any(MsixConfigSnapshot {
255            table_entries: self.table_entries.clone(),
256            pba_entries: self.pba_entries.clone(),
257            masked: self.masked,
258            enabled: self.enabled,
259            msix_num: self.msix_num,
260            pci_id: self.pci_id,
261            pci_address: self.pci_address,
262            device_name: self.device_name.clone(),
263            irq_gsi_vec: self
264                .irq_vec
265                .iter()
266                .map(|irq_opt| irq_opt.as_ref().map(|irq| irq.gsi))
267                .collect(),
268        })
269        .context("failed to serialize MsixConfigSnapshot")
270    }
271
272    /// Restore a MsixConfig struct based on a snapshot. In short, this will
273    /// restore all data exposed via MMIO, and recreate all MSI-X vectors (they
274    /// will be re-wired to the irq chip).
275    pub fn restore(&mut self, snapshot: AnySnapshot) -> MsixResult<()> {
276        let snapshot: MsixConfigSnapshot =
277            AnySnapshot::from_any(snapshot).map_err(MsixError::DeserializationFailed)?;
278
279        self.table_entries = snapshot.table_entries;
280        self.pba_entries = snapshot.pba_entries;
281        self.masked = snapshot.masked;
282        self.enabled = snapshot.enabled;
283        self.msix_num = snapshot.msix_num;
284        self.pci_id = snapshot.pci_id;
285        self.pci_address = snapshot.pci_address;
286        self.device_name = snapshot.device_name;
287
288        self.msix_release_all()?;
289        self.irq_vec
290            .resize_with(snapshot.irq_gsi_vec.len(), || None::<IrqfdGsi>);
291        for (vector, gsi) in snapshot.irq_gsi_vec.iter().enumerate() {
292            if let Some(gsi_num) = gsi {
293                self.msix_restore_one(vector, *gsi_num)?;
294            } else {
295                info!(
296                    "skipping restore of vector {} for device {}",
297                    vector, self.device_name
298                );
299            }
300        }
301        Ok(())
302    }
303
304    /// Restore the specified MSI-X vector.
305    ///
306    /// Note: we skip the checks from [MsixConfig::msix_enable_one] because for
307    /// an interrupt to be present in [MsixConfigSnapshot::irq_gsi_vec], it must
308    /// have passed those checks.
309    fn msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()> {
310        let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
311        let request = VmIrqRequest::AllocateOneMsiAtGsi {
312            irqfd,
313            gsi,
314            device_id: PciId::from(self.pci_id).into(),
315            queue_id: index,
316            device_name: self.device_name.clone(),
317        };
318        self.msi_device_socket
319            .send(&request)
320            .map_err(MsixError::AllocateOneMsiSend)?;
321        if let VmIrqResponse::Err(e) = self
322            .msi_device_socket
323            .recv()
324            .map_err(MsixError::AllocateOneMsiRecv)?
325        {
326            return Err(MsixError::AllocateOneMsi(e));
327        };
328
329        self.irq_vec[index] = Some(IrqfdGsi {
330            irqfd: match request {
331                VmIrqRequest::AllocateOneMsiAtGsi { irqfd, .. } => irqfd,
332                _ => unreachable!(),
333            },
334            gsi,
335        });
336        self.add_msi_route(index as u16, gsi)?;
337        Ok(())
338    }
339
340    /// On warm restore, there could already be MSIs registered. We need to
341    /// release them in case the routing has changed (e.g. different
342    /// data <-> GSI).
343    fn msix_release_all(&mut self) -> MsixResult<()> {
344        for irqfd_gsi in self.irq_vec.drain(..).flatten() {
345            let request = VmIrqRequest::ReleaseOneIrq {
346                gsi: irqfd_gsi.gsi,
347                irqfd: irqfd_gsi.irqfd,
348            };
349
350            self.msi_device_socket
351                .send(&request)
352                .map_err(MsixError::ReleaseOneIrqSend)?;
353            if let VmIrqResponse::Err(e) = self
354                .msi_device_socket
355                .recv()
356                .map_err(MsixError::ReleaseOneIrqRecv)?
357            {
358                return Err(MsixError::ReleaseOneIrq(e));
359            }
360        }
361        Ok(())
362    }
363
364    fn add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()> {
365        let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
366        self.read_msix_table((index * 16).into(), data.as_mut());
367        let msi_address: u64 = u64::from_le_bytes(data);
368        let mut data: [u8; 4] = [0, 0, 0, 0];
369        self.read_msix_table((index * 16 + 8).into(), data.as_mut());
370        let msi_data: u32 = u32::from_le_bytes(data);
371
372        if msi_address == 0 {
373            return Ok(());
374        }
375
376        // Only used on aarch64, but make sure it is initialized correctly on all archs for better
377        // test coverage.
378        #[allow(unused_variables)]
379        let pci_address = self
380            .pci_address
381            .expect("MsixConfig: must call set_pci_address before config writes");
382
383        self.msi_device_socket
384            .send(&VmIrqRequest::AddMsiRoute {
385                gsi,
386                msi_address,
387                msi_data,
388                #[cfg(target_arch = "aarch64")]
389                pci_address,
390            })
391            .map_err(MsixError::AddMsiRouteSend)?;
392        if let VmIrqResponse::Err(e) = self
393            .msi_device_socket
394            .recv()
395            .map_err(MsixError::AddMsiRouteRecv)?
396        {
397            return Err(MsixError::AddMsiRoute(e));
398        }
399        Ok(())
400    }
401
402    // Enable MSI-X
403    fn msix_enable_all(&mut self) -> MsixResult<()> {
404        for index in 0..self.irq_vec.len() {
405            self.msix_enable_one(index)?;
406        }
407        Ok(())
408    }
409
410    // Use a new MSI-X vector
411    // Create a new eventfd and bind them to a new msi
412    fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> {
413        if self.irq_vec[index].is_some()
414            || !self.enabled()
415            || self.masked()
416            || self.table_masked(index)
417        {
418            return Ok(());
419        }
420        let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
421        let request = VmIrqRequest::AllocateOneMsi {
422            irqfd,
423            device_id: vm_control::PciId::from(self.pci_id).into(),
424            queue_id: index,
425            device_name: self.device_name.clone(),
426        };
427        self.msi_device_socket
428            .send(&request)
429            .map_err(MsixError::AllocateOneMsiSend)?;
430        let irq_num: u32 = match self
431            .msi_device_socket
432            .recv()
433            .map_err(MsixError::AllocateOneMsiRecv)?
434        {
435            VmIrqResponse::AllocateOneMsi { gsi } => gsi,
436            VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)),
437            _ => unreachable!(),
438        };
439        self.irq_vec[index] = Some(IrqfdGsi {
440            irqfd: match request {
441                VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd,
442                _ => unreachable!(),
443            },
444            gsi: irq_num,
445        });
446
447        self.add_msi_route(index as u16, irq_num)?;
448        Ok(())
449    }
450
451    /// Read MSI-X table
452    ///  # Arguments
453    ///  * 'offset' - the offset within the MSI-X Table
454    ///  * 'data' - used to store the read results
455    ///
456    /// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full
457    /// DWORD or aligned full QWORD transactions; otherwise, the result is undefined.
458    ///
459    ///   location: DWORD3            DWORD2      DWORD1            DWORD0
460    ///   entry 0:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
461    ///   entry 1:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
462    ///   entry 2:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
463    ///   ...
464    pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) {
465        let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
466        let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
467
468        if index >= self.table_entries.len() {
469            error!("invalid MSI-X table index {}", index);
470            return;
471        }
472
473        match data.len() {
474            4 => {
475                let value = match modulo_offset {
476                    0x0 => self.table_entries[index].msg_addr_lo,
477                    0x4 => self.table_entries[index].msg_addr_hi,
478                    0x8 => self.table_entries[index].msg_data,
479                    0xc => self.table_entries[index].vector_ctl,
480                    _ => {
481                        error!("invalid offset");
482                        0
483                    }
484                };
485
486                data.copy_from_slice(&value.to_le_bytes());
487            }
488            8 => {
489                let value = match modulo_offset {
490                    0x0 => {
491                        (u64::from(self.table_entries[index].msg_addr_hi) << 32)
492                            | u64::from(self.table_entries[index].msg_addr_lo)
493                    }
494                    0x8 => {
495                        (u64::from(self.table_entries[index].vector_ctl) << 32)
496                            | u64::from(self.table_entries[index].msg_data)
497                    }
498                    _ => {
499                        error!("invalid offset");
500                        0
501                    }
502                };
503
504                data.copy_from_slice(&value.to_le_bytes());
505            }
506            _ => error!("invalid data length"),
507        };
508    }
509
510    /// Write to MSI-X table
511    ///
512    /// Message Address: the contents of this field specifies the address
513    ///     for the memory write transaction; different MSI-X vectors have
514    ///     different Message Address values
515    /// Message Data: the contents of this field specifies the data driven
516    ///     on AD\[31::00\] during the memory write transaction's data phase.
517    /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit
518    ///     is set, the function is prohibited from sending a message using
519    ///     this MSI-X Table entry.
520    pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
521        let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
522        let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
523
524        if index >= self.table_entries.len() {
525            error!("invalid MSI-X table index {}", index);
526            return MsixStatus::NothingToDo;
527        }
528
529        // Store the value of the entry before modification
530        let old_entry = self.table_entries[index].clone();
531
532        match data.len() {
533            4 => {
534                let value = u32::from_le_bytes(data.try_into().unwrap());
535                match modulo_offset {
536                    0x0 => self.table_entries[index].msg_addr_lo = value,
537                    0x4 => self.table_entries[index].msg_addr_hi = value,
538                    0x8 => self.table_entries[index].msg_data = value,
539                    0xc => self.table_entries[index].vector_ctl = value,
540                    _ => error!("invalid offset"),
541                };
542            }
543            8 => {
544                let value = u64::from_le_bytes(data.try_into().unwrap());
545                match modulo_offset {
546                    0x0 => {
547                        self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
548                        self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
549                    }
550                    0x8 => {
551                        self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
552                        self.table_entries[index].vector_ctl = (value >> 32) as u32;
553                    }
554                    _ => error!("invalid offset"),
555                };
556            }
557            _ => error!("invalid data length"),
558        };
559
560        let new_entry = self.table_entries[index].clone();
561
562        // This MSI-X vector is enabled for the first time.
563        if self.enabled()
564            && !self.masked()
565            && self.irq_vec[index].is_none()
566            && old_entry.masked()
567            && !new_entry.masked()
568        {
569            if let Err(e) = self.msix_enable_one(index) {
570                error!("failed to enable MSI-X vector {}: {}", index, e);
571                self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT;
572            }
573            return MsixStatus::EntryChanged(index);
574        }
575
576        if self.enabled()
577            && (old_entry.msg_addr_lo != new_entry.msg_addr_lo
578                || old_entry.msg_addr_hi != new_entry.msg_addr_hi
579                || old_entry.msg_data != new_entry.msg_data)
580        {
581            if let Some(irqfd_gsi) = &self.irq_vec[index] {
582                let irq_num = irqfd_gsi.gsi;
583                if let Err(e) = self.add_msi_route(index as u16, irq_num) {
584                    error!("add_msi_route failed: {}", e);
585                }
586            }
587        }
588
589        // After the MSI-X table entry has been updated, it is necessary to
590        // check if the vector control masking bit has changed. In case the
591        // bit has been flipped from 1 to 0, we need to inject a MSI message
592        // if the corresponding pending bit from the PBA is set. Once the MSI
593        // has been injected, the pending bit in the PBA needs to be cleared.
594        // All of this is valid only if MSI-X has not been masked for the whole
595        // device.
596
597        // Check if bit has been flipped
598        if !self.masked() {
599            if old_entry.masked() && !self.table_entries[index].masked() {
600                if self.get_pba_bit(index as u16) == 1 {
601                    self.inject_msix_and_clear_pba(index);
602                }
603                return MsixStatus::EntryChanged(index);
604            } else if !old_entry.masked() && self.table_entries[index].masked() {
605                return MsixStatus::EntryChanged(index);
606            }
607        }
608        MsixStatus::NothingToDo
609    }
610
611    /// Read PBA Entries
612    ///  # Arguments
613    ///  * 'offset' - the offset within the PBA entries
614    ///  * 'data' - used to store the read results
615    ///
616    /// Pending Bits\[63::00\]: For each Pending Bit that is set, the function
617    /// has a pending message for the associated MSI-X Table entry.
618    pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) {
619        let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
620        let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
621
622        if index >= self.pba_entries.len() {
623            error!("invalid PBA index {}", index);
624            return;
625        }
626
627        match data.len() {
628            4 => {
629                let value: u32 = match modulo_offset {
630                    0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
631                    0x4 => (self.pba_entries[index] >> 32) as u32,
632                    _ => {
633                        error!("invalid offset");
634                        0
635                    }
636                };
637
638                data.copy_from_slice(&value.to_le_bytes());
639            }
640            8 => {
641                let value: u64 = match modulo_offset {
642                    0x0 => self.pba_entries[index],
643                    _ => {
644                        error!("invalid offset");
645                        0
646                    }
647                };
648
649                data.copy_from_slice(&value.to_le_bytes());
650            }
651            _ => error!("invalid data length"),
652        }
653    }
654
655    /// Write to PBA Entries
656    ///
657    /// Software should never write, and should only read Pending Bits.
658    /// If software writes to Pending Bits, the result is undefined.
659    pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) {
660        error!("Pending Bit Array is read only");
661    }
662
663    fn set_pba_bit(&mut self, vector: u16, set: bool) {
664        assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
665
666        let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
667        let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
668        let mut mask: u64 = (1 << shift) as u64;
669
670        if set {
671            self.pba_entries[index] |= mask;
672        } else {
673            mask = !mask;
674            self.pba_entries[index] &= mask;
675        }
676    }
677
678    fn get_pba_bit(&self, vector: u16) -> u8 {
679        assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
680
681        let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
682        let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
683
684        ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
685    }
686
687    fn inject_msix_and_clear_pba(&mut self, vector: usize) {
688        if let Some(irq) = &self.irq_vec[vector] {
689            irq.irqfd.signal().unwrap();
690        }
691
692        // Clear the bit from PBA
693        self.set_pba_bit(vector as u16, false);
694    }
695
696    /// Inject virtual interrupt to the guest
697    ///
698    ///  # Arguments
699    ///  * 'vector' - the index to the MSI-X Table entry
700    ///
701    /// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is
702    /// prohibited from sending the associated message, and the function
703    /// must set the associated Pending bit whenever the function would
704    /// otherwise send the message. When software unmasks a vector whose
705    /// associated Pending bit is set, the function must schedule sending
706    /// the associated message, and clear the Pending bit as soon as the
707    /// message has been sent.
708    ///
709    /// If the vector is unmasked, writing to irqfd which wakes up KVM to
710    /// inject virtual interrupt to the guest.
711    pub fn trigger(&mut self, vector: u16) {
712        if self.table_entries[vector as usize].masked() || self.masked() {
713            self.set_pba_bit(vector, true);
714        } else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) {
715            irq.irqfd.signal().unwrap();
716        }
717    }
718
719    /// Return the raw descriptor of the MSI device socket
720    pub fn get_msi_socket(&self) -> RawDescriptor {
721        self.msi_device_socket.as_raw_descriptor()
722    }
723
724    /// Return irqfd of MSI-X Table entry
725    ///
726    ///  # Arguments
727    ///  * 'vector' - the index to the MSI-X table entry
728    pub fn get_irqfd(&self, vector: usize) -> Option<&Event> {
729        match self.irq_vec.get(vector).unwrap_or(&None) {
730            Some(irq) => Some(&irq.irqfd),
731            None => None,
732        }
733    }
734
735    pub fn destroy(&mut self) {
736        while let Some(irq) = self.irq_vec.pop() {
737            if let Some(irq) = irq {
738                let request = VmIrqRequest::ReleaseOneIrq {
739                    gsi: irq.gsi,
740                    irqfd: irq.irqfd,
741                };
742                if self.msi_device_socket.send(&request).is_err() {
743                    continue;
744                }
745                let _ = self.msi_device_socket.recv::<VmIrqResponse>();
746            }
747        }
748    }
749}
750
751const MSIX_CONFIG_READ_MASK: [u32; 3] = [0xc000_0000, 0, 0];
752
753impl PciCapConfig for MsixConfig {
754    fn read_mask(&self) -> &'static [u32] {
755        &MSIX_CONFIG_READ_MASK
756    }
757
758    fn read_reg(&self, reg_idx: usize) -> u32 {
759        if reg_idx == 0 {
760            self.read_msix_capability(0)
761        } else {
762            0
763        }
764    }
765
766    fn write_reg(
767        &mut self,
768        reg_idx: usize,
769        offset: u64,
770        data: &[u8],
771    ) -> Option<Box<dyn PciCapConfigWriteResult>> {
772        let status = if reg_idx == 0 {
773            self.write_msix_capability(offset, data)
774        } else {
775            MsixStatus::NothingToDo
776        };
777        Some(Box::new(status))
778    }
779}
780
781impl AsRawDescriptor for MsixConfig {
782    fn as_raw_descriptor(&self) -> RawDescriptor {
783        self.msi_device_socket.as_raw_descriptor()
784    }
785}
786
787/// Message Control Register
788//   10-0:  MSI-X Table size
789//   13-11: Reserved
790//   14:    Mask. Mask all MSI-X when set.
791//   15:    Enable. Enable all MSI-X when set.
792// See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details.
793#[bitfield]
794#[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
795pub struct MsixCtrl {
796    table_size: B10,
797    reserved: B4,
798    mask: B1,
799    enable: B1,
800}
801
802#[allow(dead_code)]
803#[repr(C)]
804#[derive(Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
805/// MSI-X Capability Structure
806pub struct MsixCap {
807    // To make add_capability() happy
808    _cap_vndr: u8,
809    _cap_next: u8,
810    // Message Control Register
811    msg_ctl: MsixCtrl,
812    // Table. Contains the offset and the BAR indicator (BIR)
813    //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
814    //   31-3: Table offset in the BAR pointed by the BIR.
815    table: u32,
816    // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
817    //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
818    //   31-3: PBA offset in the BAR pointed by the BIR.
819    pba: u32,
820}
821
822impl PciCapability for MsixCap {
823    fn bytes(&self) -> &[u8] {
824        self.as_bytes()
825    }
826
827    fn id(&self) -> PciCapabilityID {
828        PciCapabilityID::Msix
829    }
830
831    fn writable_bits(&self) -> Vec<u32> {
832        // Only msg_ctl[15:14] is writable
833        vec![0x3000_0000, 0, 0]
834    }
835}
836
837impl MsixCap {
838    pub fn new(
839        table_pci_bar: u8,
840        table_size: u16,
841        table_off: u32,
842        pba_pci_bar: u8,
843        pba_off: u32,
844    ) -> Self {
845        assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
846
847        // Set the table size and enable MSI-X.
848        let mut msg_ctl = MsixCtrl::new();
849        msg_ctl.set_enable(1);
850        // Table Size is N - 1 encoded.
851        msg_ctl.set_table_size(table_size - 1);
852
853        MsixCap {
854            _cap_vndr: 0,
855            _cap_next: 0,
856            msg_ctl,
857            table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
858            pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
859        }
860    }
861}
862
863#[cfg(test)]
864mod tests {
865
866    use std::thread;
867
868    use super::*;
869
870    #[track_caller]
871    fn recv_allocate_msi(t: &Tube) -> u32 {
872        match t.recv::<VmIrqRequest>().unwrap() {
873            VmIrqRequest::AllocateOneMsiAtGsi { gsi, .. } => gsi,
874            msg => panic!("unexpected irqchip message: {msg:?}"),
875        }
876    }
877
878    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
879    struct MsiRouteDetails {
880        gsi: u32,
881        msi_address: u64,
882        msi_data: u32,
883        #[cfg(target_arch = "aarch64")]
884        pci_address: resources::PciAddress,
885    }
886
887    const TEST_PCI_ADDRESS: resources::PciAddress = resources::PciAddress {
888        bus: 1,
889        dev: 2,
890        func: 3,
891    };
892
893    #[track_caller]
894    fn recv_add_msi_route(t: &Tube) -> MsiRouteDetails {
895        match t.recv::<VmIrqRequest>().unwrap() {
896            VmIrqRequest::AddMsiRoute {
897                gsi,
898                msi_address,
899                msi_data,
900                #[cfg(target_arch = "aarch64")]
901                pci_address,
902            } => MsiRouteDetails {
903                gsi,
904                msi_address,
905                msi_data,
906                #[cfg(target_arch = "aarch64")]
907                pci_address,
908            },
909            msg => panic!("unexpected irqchip message: {msg:?}"),
910        }
911    }
912
913    #[track_caller]
914    fn recv_release_one_irq(t: &Tube) -> u32 {
915        match t.recv::<VmIrqRequest>().unwrap() {
916            VmIrqRequest::ReleaseOneIrq { gsi, irqfd: _ } => gsi,
917            msg => panic!("unexpected irqchip message: {msg:?}"),
918        }
919    }
920
921    #[track_caller]
922    fn send_ok(t: &Tube) {
923        t.send(&VmIrqResponse::Ok).unwrap();
924    }
925
926    /// Tests a cold restore where there are no existing vectors at the time
927    /// restore is called.
928    #[test]
929    fn verify_msix_restore_cold_smoke() {
930        let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
931        let (_unused, unused_config_tube) = Tube::pair().unwrap();
932
933        let mut cfg = MsixConfig::new(2, unused_config_tube, 0, "test_device".to_owned());
934        cfg.set_pci_address(TEST_PCI_ADDRESS);
935
936        // Set up two MSI-X vectors (0 and 1).
937        // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
938        cfg.table_entries[0].msg_data = 0xd0;
939        cfg.table_entries[0].msg_addr_lo = 0xa0;
940        cfg.table_entries[0].msg_addr_hi = 0;
941        cfg.table_entries[1].msg_data = 0xd1;
942        cfg.table_entries[1].msg_addr_lo = 0xa1;
943        cfg.table_entries[1].msg_addr_hi = 0;
944
945        // Pretend that these vectors were hooked up to GSIs 10 & 20,
946        // respectively.
947        cfg.irq_vec = vec![
948            Some(IrqfdGsi {
949                gsi: 10,
950                irqfd: Event::new().unwrap(),
951            }),
952            Some(IrqfdGsi {
953                gsi: 20,
954                irqfd: Event::new().unwrap(),
955            }),
956        ];
957
958        // Take a snapshot of MsixConfig.
959        let snapshot = cfg.snapshot().unwrap();
960
961        // Create a fake irqchip to respond to our requests
962        let irqchip_fake = thread::spawn(move || {
963            assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
964            send_ok(&irqchip_tube);
965            assert_eq!(
966                recv_add_msi_route(&irqchip_tube),
967                MsiRouteDetails {
968                    gsi: 10,
969                    msi_address: 0xa0,
970                    msi_data: 0xd0,
971                    #[cfg(target_arch = "aarch64")]
972                    pci_address: TEST_PCI_ADDRESS,
973                }
974            );
975            send_ok(&irqchip_tube);
976
977            assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
978            send_ok(&irqchip_tube);
979            assert_eq!(
980                recv_add_msi_route(&irqchip_tube),
981                MsiRouteDetails {
982                    gsi: 20,
983                    msi_address: 0xa1,
984                    msi_data: 0xd1,
985                    #[cfg(target_arch = "aarch64")]
986                    pci_address: TEST_PCI_ADDRESS,
987                }
988            );
989            send_ok(&irqchip_tube);
990            irqchip_tube
991        });
992
993        let mut restored_cfg = MsixConfig::new(10, msix_config_tube, 10, "some_device".to_owned());
994        restored_cfg.restore(snapshot).unwrap();
995        irqchip_fake.join().unwrap();
996
997        assert_eq!(restored_cfg.pci_id, 0);
998        assert_eq!(restored_cfg.device_name, "test_device");
999    }
1000
1001    /// Tests a warm restore where there are existing vectors at the time
1002    /// restore is called. These vectors need to be released first.
1003    #[test]
1004    fn verify_msix_restore_warm_smoke() {
1005        let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
1006
1007        let mut cfg = MsixConfig::new(2, msix_config_tube, 0, "test_device".to_owned());
1008        cfg.set_pci_address(TEST_PCI_ADDRESS);
1009
1010        // Set up two MSI-X vectors (0 and 1).
1011        // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
1012        cfg.table_entries[0].msg_data = 0xd0;
1013        cfg.table_entries[0].msg_addr_lo = 0xa0;
1014        cfg.table_entries[0].msg_addr_hi = 0;
1015        cfg.table_entries[1].msg_data = 0xd1;
1016        cfg.table_entries[1].msg_addr_lo = 0xa1;
1017        cfg.table_entries[1].msg_addr_hi = 0;
1018
1019        // Pretend that these vectors were hooked up to GSIs 10 & 20,
1020        // respectively.
1021        cfg.irq_vec = vec![
1022            Some(IrqfdGsi {
1023                gsi: 10,
1024                irqfd: Event::new().unwrap(),
1025            }),
1026            Some(IrqfdGsi {
1027                gsi: 20,
1028                irqfd: Event::new().unwrap(),
1029            }),
1030        ];
1031
1032        // Take a snapshot of MsixConfig.
1033        let snapshot = cfg.snapshot().unwrap();
1034
1035        // Create a fake irqchip to respond to our requests
1036        let irqchip_fake = thread::spawn(move || {
1037            // First, we free the existing vectors / GSIs.
1038            assert_eq!(recv_release_one_irq(&irqchip_tube), 10);
1039            send_ok(&irqchip_tube);
1040            assert_eq!(recv_release_one_irq(&irqchip_tube), 20);
1041            send_ok(&irqchip_tube);
1042
1043            // Now we re-allocate them.
1044            assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
1045            send_ok(&irqchip_tube);
1046            assert_eq!(
1047                recv_add_msi_route(&irqchip_tube),
1048                MsiRouteDetails {
1049                    gsi: 10,
1050                    msi_address: 0xa0,
1051                    msi_data: 0xd0,
1052                    #[cfg(target_arch = "aarch64")]
1053                    pci_address: TEST_PCI_ADDRESS,
1054                }
1055            );
1056            send_ok(&irqchip_tube);
1057
1058            assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
1059            send_ok(&irqchip_tube);
1060            assert_eq!(
1061                recv_add_msi_route(&irqchip_tube),
1062                MsiRouteDetails {
1063                    gsi: 20,
1064                    msi_address: 0xa1,
1065                    msi_data: 0xd1,
1066                    #[cfg(target_arch = "aarch64")]
1067                    pci_address: TEST_PCI_ADDRESS,
1068                }
1069            );
1070            send_ok(&irqchip_tube);
1071            irqchip_tube
1072        });
1073
1074        cfg.restore(snapshot).unwrap();
1075        irqchip_fake.join().unwrap();
1076
1077        assert_eq!(cfg.pci_id, 0);
1078        assert_eq!(cfg.device_name, "test_device");
1079    }
1080}