devices/pci/
msix.rs

1// Copyright 2019 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::convert::TryInto;
6
7use anyhow::Context;
8use base::error;
9use base::info;
10use base::AsRawDescriptor;
11use base::Error as SysError;
12use base::Event;
13use base::RawDescriptor;
14use base::Tube;
15use base::TubeError;
16use bit_field::*;
17use remain::sorted;
18use serde::Deserialize;
19use serde::Serialize;
20use snapshot::AnySnapshot;
21use thiserror::Error;
22use vm_control::VmIrqRequest;
23use vm_control::VmIrqResponse;
24use zerocopy::FromBytes;
25use zerocopy::Immutable;
26use zerocopy::IntoBytes;
27use zerocopy::KnownLayout;
28
29use crate::pci::pci_configuration::PciCapConfig;
30use crate::pci::pci_configuration::PciCapConfigWriteResult;
31use crate::pci::PciCapability;
32use crate::pci::PciCapabilityID;
33
34const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
35pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
36pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
37pub const BITS_PER_PBA_ENTRY: usize = 64;
38const FUNCTION_MASK_BIT: u16 = 0x4000;
39const MSIX_ENABLE_BIT: u16 = 0x8000;
40const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1;
41
42#[derive(Serialize, Deserialize, Clone, Default)]
43struct MsixTableEntry {
44    msg_addr_lo: u32,
45    msg_addr_hi: u32,
46    msg_data: u32,
47    vector_ctl: u32,
48}
49
50impl MsixTableEntry {
51    fn masked(&self) -> bool {
52        self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT
53    }
54}
55
56struct IrqfdGsi {
57    irqfd: Event,
58    gsi: u32,
59}
60
61/// Wrapper over MSI-X Capability Structure and MSI-X Tables
62pub struct MsixConfig {
63    table_entries: Vec<MsixTableEntry>,
64    pba_entries: Vec<u64>,
65    irq_vec: Vec<Option<IrqfdGsi>>,
66    masked: bool,
67    enabled: bool,
68    msi_device_socket: Tube,
69    msix_num: u16,
70    pci_id: u32,
71    pci_address: Option<resources::PciAddress>,
72    device_name: String,
73}
74
75#[derive(Serialize, Deserialize)]
76struct MsixConfigSnapshot {
77    table_entries: Vec<MsixTableEntry>,
78    pba_entries: Vec<u64>,
79    /// Just like MsixConfig::irq_vec, but only the GSI.
80    irq_gsi_vec: Vec<Option<u32>>,
81    masked: bool,
82    enabled: bool,
83    msix_num: u16,
84    pci_id: u32,
85    pci_address: Option<resources::PciAddress>,
86    device_name: String,
87}
88
89#[sorted]
90#[derive(Error, Debug)]
91pub enum MsixError {
92    #[error("AddMsiRoute failed: {0}")]
93    AddMsiRoute(SysError),
94    #[error("failed to receive AddMsiRoute response: {0}")]
95    AddMsiRouteRecv(TubeError),
96    #[error("failed to send AddMsiRoute request: {0}")]
97    AddMsiRouteSend(TubeError),
98    #[error("AllocateOneMsi failed: {0}")]
99    AllocateOneMsi(SysError),
100    #[error("failed to receive AllocateOneMsi response: {0}")]
101    AllocateOneMsiRecv(TubeError),
102    #[error("failed to send AllocateOneMsi request: {0}")]
103    AllocateOneMsiSend(TubeError),
104    #[error("failed to deserialize snapshot: {0}")]
105    DeserializationFailed(anyhow::Error),
106    #[error("invalid vector length in snapshot: {0}")]
107    InvalidVectorLength(std::num::TryFromIntError),
108    #[error("ReleaseOneIrq failed: {0}")]
109    ReleaseOneIrq(base::Error),
110    #[error("failed to receive ReleaseOneIrq response: {0}")]
111    ReleaseOneIrqRecv(TubeError),
112    #[error("failed to send ReleaseOneIrq request: {0}")]
113    ReleaseOneIrqSend(TubeError),
114}
115
116type MsixResult<T> = std::result::Result<T, MsixError>;
117
118#[derive(Copy, Clone)]
119pub enum MsixStatus {
120    Changed,
121    EntryChanged(usize),
122    NothingToDo,
123}
124
125impl PciCapConfigWriteResult for MsixStatus {}
126
127impl MsixConfig {
128    pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self {
129        assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
130
131        let mut table_entries: Vec<MsixTableEntry> = Vec::new();
132        table_entries.resize_with(msix_vectors as usize, Default::default);
133        table_entries
134            .iter_mut()
135            .for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT);
136        let mut pba_entries: Vec<u64> = Vec::new();
137        let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY);
138        pba_entries.resize_with(num_pba_entries, Default::default);
139
140        let mut irq_vec = Vec::new();
141        irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>);
142
143        MsixConfig {
144            table_entries,
145            pba_entries,
146            irq_vec,
147            masked: false,
148            enabled: false,
149            msi_device_socket: vm_socket,
150            msix_num: msix_vectors,
151            pci_id,
152            pci_address: None,
153            device_name,
154        }
155    }
156
157    /// PCI address of the associated device.
158    pub fn set_pci_address(&mut self, pci_address: resources::PciAddress) {
159        self.pci_address = Some(pci_address);
160    }
161
162    /// Get the number of MSI-X vectors in this configuration.
163    pub fn num_vectors(&self) -> u16 {
164        self.msix_num
165    }
166
167    /// Check whether the Function Mask bit in Message Control word in set or not.
168    /// if 1, all of the vectors associated with the function are masked,
169    /// regardless of their per-vector Mask bit states.
170    /// If 0, each vector's Mask bit determines whether the vector is masked or not.
171    pub fn masked(&self) -> bool {
172        self.masked
173    }
174
175    /// Check whether the Function Mask bit in MSIX table Message Control
176    /// word in set or not.
177    /// If true, the vector is masked.
178    /// If false, the vector is unmasked.
179    pub fn table_masked(&self, index: usize) -> bool {
180        if index >= self.table_entries.len() {
181            true
182        } else {
183            self.table_entries[index].masked()
184        }
185    }
186
187    /// Check whether the MSI-X Enable bit in Message Control word in set or not.
188    /// if 1, the function is permitted to use MSI-X to request service.
189    pub fn enabled(&self) -> bool {
190        self.enabled
191    }
192
193    /// Read the MSI-X Capability Structure.
194    /// The top 2 bits in Message Control word are emulated and all other
195    /// bits are read only.
196    pub fn read_msix_capability(&self, data: u32) -> u32 {
197        let mut msg_ctl = (data >> 16) as u16;
198        msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT);
199
200        if self.enabled {
201            msg_ctl |= MSIX_ENABLE_BIT;
202        }
203        if self.masked {
204            msg_ctl |= FUNCTION_MASK_BIT;
205        }
206        (msg_ctl as u32) << 16 | (data & u16::MAX as u32)
207    }
208
209    /// Write to the MSI-X Capability Structure.
210    /// Only the top 2 bits in Message Control Word are writable.
211    pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
212        if offset == 2 && data.len() == 2 {
213            let reg = u16::from_le_bytes([data[0], data[1]]);
214            let old_masked = self.masked;
215            let old_enabled = self.enabled;
216
217            self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT;
218            self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT;
219
220            if !old_enabled && self.enabled {
221                if let Err(e) = self.msix_enable_all() {
222                    error!("failed to enable MSI-X: {}", e);
223                    self.enabled = false;
224                }
225            }
226
227            // If the Function Mask bit was set, and has just been cleared, it's
228            // important to go through the entire PBA to check if there was any
229            // pending MSI-X message to inject, given that the vector is not
230            // masked.
231            if old_masked && !self.masked {
232                for (index, entry) in self.table_entries.clone().iter().enumerate() {
233                    if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
234                        self.inject_msix_and_clear_pba(index);
235                    }
236                }
237                return MsixStatus::Changed;
238            } else if !old_masked && self.masked {
239                return MsixStatus::Changed;
240            }
241        } else {
242            error!(
243                "invalid write to MSI-X Capability Structure offset {:x}",
244                offset
245            );
246        }
247        MsixStatus::NothingToDo
248    }
249
250    /// Create a snapshot of the current MsixConfig struct for use in
251    /// snapshotting.
252    pub fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
253        AnySnapshot::to_any(MsixConfigSnapshot {
254            table_entries: self.table_entries.clone(),
255            pba_entries: self.pba_entries.clone(),
256            masked: self.masked,
257            enabled: self.enabled,
258            msix_num: self.msix_num,
259            pci_id: self.pci_id,
260            pci_address: self.pci_address,
261            device_name: self.device_name.clone(),
262            irq_gsi_vec: self
263                .irq_vec
264                .iter()
265                .map(|irq_opt| irq_opt.as_ref().map(|irq| irq.gsi))
266                .collect(),
267        })
268        .context("failed to serialize MsixConfigSnapshot")
269    }
270
271    /// Restore a MsixConfig struct based on a snapshot. In short, this will
272    /// restore all data exposed via MMIO, and recreate all MSI-X vectors (they
273    /// will be re-wired to the irq chip).
274    pub fn restore(&mut self, snapshot: AnySnapshot) -> MsixResult<()> {
275        let snapshot: MsixConfigSnapshot =
276            AnySnapshot::from_any(snapshot).map_err(MsixError::DeserializationFailed)?;
277
278        self.table_entries = snapshot.table_entries;
279        self.pba_entries = snapshot.pba_entries;
280        self.masked = snapshot.masked;
281        self.enabled = snapshot.enabled;
282        self.msix_num = snapshot.msix_num;
283        self.pci_id = snapshot.pci_id;
284        self.pci_address = snapshot.pci_address;
285        self.device_name = snapshot.device_name;
286
287        self.msix_release_all()?;
288        self.irq_vec
289            .resize_with(snapshot.irq_gsi_vec.len(), || None::<IrqfdGsi>);
290        for (vector, gsi) in snapshot.irq_gsi_vec.iter().enumerate() {
291            if let Some(gsi_num) = gsi {
292                self.msix_restore_one(vector, *gsi_num)?;
293            } else {
294                info!(
295                    "skipping restore of vector {} for device {}",
296                    vector, self.device_name
297                );
298            }
299        }
300        Ok(())
301    }
302
303    /// Restore the specified MSI-X vector.
304    ///
305    /// Note: we skip the checks from [MsixConfig::msix_enable_one] because for
306    /// an interrupt to be present in [MsixConfigSnapshot::irq_gsi_vec], it must
307    /// have passed those checks.
308    fn msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()> {
309        let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
310        let request = VmIrqRequest::AllocateOneMsiAtGsi {
311            irqfd,
312            gsi,
313            device_id: self.pci_id,
314            queue_id: index,
315            device_name: self.device_name.clone(),
316        };
317        self.msi_device_socket
318            .send(&request)
319            .map_err(MsixError::AllocateOneMsiSend)?;
320        if let VmIrqResponse::Err(e) = self
321            .msi_device_socket
322            .recv()
323            .map_err(MsixError::AllocateOneMsiRecv)?
324        {
325            return Err(MsixError::AllocateOneMsi(e));
326        };
327
328        self.irq_vec[index] = Some(IrqfdGsi {
329            irqfd: match request {
330                VmIrqRequest::AllocateOneMsiAtGsi { irqfd, .. } => irqfd,
331                _ => unreachable!(),
332            },
333            gsi,
334        });
335        self.add_msi_route(index as u16, gsi)?;
336        Ok(())
337    }
338
339    /// On warm restore, there could already be MSIs registered. We need to
340    /// release them in case the routing has changed (e.g. different
341    /// data <-> GSI).
342    fn msix_release_all(&mut self) -> MsixResult<()> {
343        for irqfd_gsi in self.irq_vec.drain(..).flatten() {
344            let request = VmIrqRequest::ReleaseOneIrq {
345                gsi: irqfd_gsi.gsi,
346                irqfd: irqfd_gsi.irqfd,
347            };
348
349            self.msi_device_socket
350                .send(&request)
351                .map_err(MsixError::ReleaseOneIrqSend)?;
352            if let VmIrqResponse::Err(e) = self
353                .msi_device_socket
354                .recv()
355                .map_err(MsixError::ReleaseOneIrqRecv)?
356            {
357                return Err(MsixError::ReleaseOneIrq(e));
358            }
359        }
360        Ok(())
361    }
362
363    fn add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()> {
364        let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
365        self.read_msix_table((index * 16).into(), data.as_mut());
366        let msi_address: u64 = u64::from_le_bytes(data);
367        let mut data: [u8; 4] = [0, 0, 0, 0];
368        self.read_msix_table((index * 16 + 8).into(), data.as_mut());
369        let msi_data: u32 = u32::from_le_bytes(data);
370
371        if msi_address == 0 {
372            return Ok(());
373        }
374
375        // Only used on aarch64, but make sure it is initialized correctly on all archs for better
376        // test coverage.
377        #[allow(unused_variables)]
378        let pci_address = self
379            .pci_address
380            .expect("MsixConfig: must call set_pci_address before config writes");
381
382        self.msi_device_socket
383            .send(&VmIrqRequest::AddMsiRoute {
384                gsi,
385                msi_address,
386                msi_data,
387                #[cfg(target_arch = "aarch64")]
388                pci_address,
389            })
390            .map_err(MsixError::AddMsiRouteSend)?;
391        if let VmIrqResponse::Err(e) = self
392            .msi_device_socket
393            .recv()
394            .map_err(MsixError::AddMsiRouteRecv)?
395        {
396            return Err(MsixError::AddMsiRoute(e));
397        }
398        Ok(())
399    }
400
401    // Enable MSI-X
402    fn msix_enable_all(&mut self) -> MsixResult<()> {
403        for index in 0..self.irq_vec.len() {
404            self.msix_enable_one(index)?;
405        }
406        Ok(())
407    }
408
409    // Use a new MSI-X vector
410    // Create a new eventfd and bind them to a new msi
411    fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> {
412        if self.irq_vec[index].is_some()
413            || !self.enabled()
414            || self.masked()
415            || self.table_masked(index)
416        {
417            return Ok(());
418        }
419        let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
420        let request = VmIrqRequest::AllocateOneMsi {
421            irqfd,
422            device_id: self.pci_id,
423            queue_id: index,
424            device_name: self.device_name.clone(),
425        };
426        self.msi_device_socket
427            .send(&request)
428            .map_err(MsixError::AllocateOneMsiSend)?;
429        let irq_num: u32 = match self
430            .msi_device_socket
431            .recv()
432            .map_err(MsixError::AllocateOneMsiRecv)?
433        {
434            VmIrqResponse::AllocateOneMsi { gsi } => gsi,
435            VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)),
436            _ => unreachable!(),
437        };
438        self.irq_vec[index] = Some(IrqfdGsi {
439            irqfd: match request {
440                VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd,
441                _ => unreachable!(),
442            },
443            gsi: irq_num,
444        });
445
446        self.add_msi_route(index as u16, irq_num)?;
447        Ok(())
448    }
449
450    /// Read MSI-X table
451    ///  # Arguments
452    ///  * 'offset' - the offset within the MSI-X Table
453    ///  * 'data' - used to store the read results
454    ///
455    /// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full
456    /// DWORD or aligned full QWORD transactions; otherwise, the result is undefined.
457    ///
458    ///   location: DWORD3            DWORD2      DWORD1            DWORD0
459    ///   entry 0:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
460    ///   entry 1:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
461    ///   entry 2:  Vector Control    Msg Data    Msg Upper Addr    Msg Addr
462    ///   ...
463    pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) {
464        let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
465        let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
466
467        if index >= self.table_entries.len() {
468            error!("invalid MSI-X table index {}", index);
469            return;
470        }
471
472        match data.len() {
473            4 => {
474                let value = match modulo_offset {
475                    0x0 => self.table_entries[index].msg_addr_lo,
476                    0x4 => self.table_entries[index].msg_addr_hi,
477                    0x8 => self.table_entries[index].msg_data,
478                    0xc => self.table_entries[index].vector_ctl,
479                    _ => {
480                        error!("invalid offset");
481                        0
482                    }
483                };
484
485                data.copy_from_slice(&value.to_le_bytes());
486            }
487            8 => {
488                let value = match modulo_offset {
489                    0x0 => {
490                        (u64::from(self.table_entries[index].msg_addr_hi) << 32)
491                            | u64::from(self.table_entries[index].msg_addr_lo)
492                    }
493                    0x8 => {
494                        (u64::from(self.table_entries[index].vector_ctl) << 32)
495                            | u64::from(self.table_entries[index].msg_data)
496                    }
497                    _ => {
498                        error!("invalid offset");
499                        0
500                    }
501                };
502
503                data.copy_from_slice(&value.to_le_bytes());
504            }
505            _ => error!("invalid data length"),
506        };
507    }
508
509    /// Write to MSI-X table
510    ///
511    /// Message Address: the contents of this field specifies the address
512    ///     for the memory write transaction; different MSI-X vectors have
513    ///     different Message Address values
514    /// Message Data: the contents of this field specifies the data driven
515    ///     on AD\[31::00\] during the memory write transaction's data phase.
516    /// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit
517    ///     is set, the function is prohibited from sending a message using
518    ///     this MSI-X Table entry.
519    pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
520        let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
521        let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
522
523        if index >= self.table_entries.len() {
524            error!("invalid MSI-X table index {}", index);
525            return MsixStatus::NothingToDo;
526        }
527
528        // Store the value of the entry before modification
529        let old_entry = self.table_entries[index].clone();
530
531        match data.len() {
532            4 => {
533                let value = u32::from_le_bytes(data.try_into().unwrap());
534                match modulo_offset {
535                    0x0 => self.table_entries[index].msg_addr_lo = value,
536                    0x4 => self.table_entries[index].msg_addr_hi = value,
537                    0x8 => self.table_entries[index].msg_data = value,
538                    0xc => self.table_entries[index].vector_ctl = value,
539                    _ => error!("invalid offset"),
540                };
541            }
542            8 => {
543                let value = u64::from_le_bytes(data.try_into().unwrap());
544                match modulo_offset {
545                    0x0 => {
546                        self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
547                        self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
548                    }
549                    0x8 => {
550                        self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
551                        self.table_entries[index].vector_ctl = (value >> 32) as u32;
552                    }
553                    _ => error!("invalid offset"),
554                };
555            }
556            _ => error!("invalid data length"),
557        };
558
559        let new_entry = self.table_entries[index].clone();
560
561        // This MSI-X vector is enabled for the first time.
562        if self.enabled()
563            && !self.masked()
564            && self.irq_vec[index].is_none()
565            && old_entry.masked()
566            && !new_entry.masked()
567        {
568            if let Err(e) = self.msix_enable_one(index) {
569                error!("failed to enable MSI-X vector {}: {}", index, e);
570                self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT;
571            }
572            return MsixStatus::EntryChanged(index);
573        }
574
575        if self.enabled()
576            && (old_entry.msg_addr_lo != new_entry.msg_addr_lo
577                || old_entry.msg_addr_hi != new_entry.msg_addr_hi
578                || old_entry.msg_data != new_entry.msg_data)
579        {
580            if let Some(irqfd_gsi) = &self.irq_vec[index] {
581                let irq_num = irqfd_gsi.gsi;
582                if let Err(e) = self.add_msi_route(index as u16, irq_num) {
583                    error!("add_msi_route failed: {}", e);
584                }
585            }
586        }
587
588        // After the MSI-X table entry has been updated, it is necessary to
589        // check if the vector control masking bit has changed. In case the
590        // bit has been flipped from 1 to 0, we need to inject a MSI message
591        // if the corresponding pending bit from the PBA is set. Once the MSI
592        // has been injected, the pending bit in the PBA needs to be cleared.
593        // All of this is valid only if MSI-X has not been masked for the whole
594        // device.
595
596        // Check if bit has been flipped
597        if !self.masked() {
598            if old_entry.masked() && !self.table_entries[index].masked() {
599                if self.get_pba_bit(index as u16) == 1 {
600                    self.inject_msix_and_clear_pba(index);
601                }
602                return MsixStatus::EntryChanged(index);
603            } else if !old_entry.masked() && self.table_entries[index].masked() {
604                return MsixStatus::EntryChanged(index);
605            }
606        }
607        MsixStatus::NothingToDo
608    }
609
610    /// Read PBA Entries
611    ///  # Arguments
612    ///  * 'offset' - the offset within the PBA entries
613    ///  * 'data' - used to store the read results
614    ///
615    /// Pending Bits\[63::00\]: For each Pending Bit that is set, the function
616    /// has a pending message for the associated MSI-X Table entry.
617    pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) {
618        let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
619        let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
620
621        if index >= self.pba_entries.len() {
622            error!("invalid PBA index {}", index);
623            return;
624        }
625
626        match data.len() {
627            4 => {
628                let value: u32 = match modulo_offset {
629                    0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
630                    0x4 => (self.pba_entries[index] >> 32) as u32,
631                    _ => {
632                        error!("invalid offset");
633                        0
634                    }
635                };
636
637                data.copy_from_slice(&value.to_le_bytes());
638            }
639            8 => {
640                let value: u64 = match modulo_offset {
641                    0x0 => self.pba_entries[index],
642                    _ => {
643                        error!("invalid offset");
644                        0
645                    }
646                };
647
648                data.copy_from_slice(&value.to_le_bytes());
649            }
650            _ => error!("invalid data length"),
651        }
652    }
653
654    /// Write to PBA Entries
655    ///
656    /// Software should never write, and should only read Pending Bits.
657    /// If software writes to Pending Bits, the result is undefined.
658    pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) {
659        error!("Pending Bit Array is read only");
660    }
661
662    fn set_pba_bit(&mut self, vector: u16, set: bool) {
663        assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
664
665        let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
666        let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
667        let mut mask: u64 = (1 << shift) as u64;
668
669        if set {
670            self.pba_entries[index] |= mask;
671        } else {
672            mask = !mask;
673            self.pba_entries[index] &= mask;
674        }
675    }
676
677    fn get_pba_bit(&self, vector: u16) -> u8 {
678        assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
679
680        let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
681        let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
682
683        ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
684    }
685
686    fn inject_msix_and_clear_pba(&mut self, vector: usize) {
687        if let Some(irq) = &self.irq_vec[vector] {
688            irq.irqfd.signal().unwrap();
689        }
690
691        // Clear the bit from PBA
692        self.set_pba_bit(vector as u16, false);
693    }
694
695    /// Inject virtual interrupt to the guest
696    ///
697    ///  # Arguments
698    ///  * 'vector' - the index to the MSI-X Table entry
699    ///
700    /// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is
701    /// prohibited from sending the associated message, and the function
702    /// must set the associated Pending bit whenever the function would
703    /// otherwise send the message. When software unmasks a vector whose
704    /// associated Pending bit is set, the function must schedule sending
705    /// the associated message, and clear the Pending bit as soon as the
706    /// message has been sent.
707    ///
708    /// If the vector is unmasked, writing to irqfd which wakes up KVM to
709    /// inject virtual interrupt to the guest.
710    pub fn trigger(&mut self, vector: u16) {
711        if self.table_entries[vector as usize].masked() || self.masked() {
712            self.set_pba_bit(vector, true);
713        } else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) {
714            irq.irqfd.signal().unwrap();
715        }
716    }
717
718    /// Return the raw descriptor of the MSI device socket
719    pub fn get_msi_socket(&self) -> RawDescriptor {
720        self.msi_device_socket.as_raw_descriptor()
721    }
722
723    /// Return irqfd of MSI-X Table entry
724    ///
725    ///  # Arguments
726    ///  * 'vector' - the index to the MSI-X table entry
727    pub fn get_irqfd(&self, vector: usize) -> Option<&Event> {
728        match self.irq_vec.get(vector).unwrap_or(&None) {
729            Some(irq) => Some(&irq.irqfd),
730            None => None,
731        }
732    }
733
734    pub fn destroy(&mut self) {
735        while let Some(irq) = self.irq_vec.pop() {
736            if let Some(irq) = irq {
737                let request = VmIrqRequest::ReleaseOneIrq {
738                    gsi: irq.gsi,
739                    irqfd: irq.irqfd,
740                };
741                if self.msi_device_socket.send(&request).is_err() {
742                    continue;
743                }
744                let _ = self.msi_device_socket.recv::<VmIrqResponse>();
745            }
746        }
747    }
748}
749
750const MSIX_CONFIG_READ_MASK: [u32; 3] = [0xc000_0000, 0, 0];
751
752impl PciCapConfig for MsixConfig {
753    fn read_mask(&self) -> &'static [u32] {
754        &MSIX_CONFIG_READ_MASK
755    }
756
757    fn read_reg(&self, reg_idx: usize) -> u32 {
758        if reg_idx == 0 {
759            self.read_msix_capability(0)
760        } else {
761            0
762        }
763    }
764
765    fn write_reg(
766        &mut self,
767        reg_idx: usize,
768        offset: u64,
769        data: &[u8],
770    ) -> Option<Box<dyn PciCapConfigWriteResult>> {
771        let status = if reg_idx == 0 {
772            self.write_msix_capability(offset, data)
773        } else {
774            MsixStatus::NothingToDo
775        };
776        Some(Box::new(status))
777    }
778}
779
780impl AsRawDescriptor for MsixConfig {
781    fn as_raw_descriptor(&self) -> RawDescriptor {
782        self.msi_device_socket.as_raw_descriptor()
783    }
784}
785
786/// Message Control Register
787//   10-0:  MSI-X Table size
788//   13-11: Reserved
789//   14:    Mask. Mask all MSI-X when set.
790//   15:    Enable. Enable all MSI-X when set.
791// See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details.
792#[bitfield]
793#[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
794pub struct MsixCtrl {
795    table_size: B10,
796    reserved: B4,
797    mask: B1,
798    enable: B1,
799}
800
801#[allow(dead_code)]
802#[repr(C)]
803#[derive(Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
804/// MSI-X Capability Structure
805pub struct MsixCap {
806    // To make add_capability() happy
807    _cap_vndr: u8,
808    _cap_next: u8,
809    // Message Control Register
810    msg_ctl: MsixCtrl,
811    // Table. Contains the offset and the BAR indicator (BIR)
812    //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
813    //   31-3: Table offset in the BAR pointed by the BIR.
814    table: u32,
815    // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
816    //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
817    //   31-3: PBA offset in the BAR pointed by the BIR.
818    pba: u32,
819}
820
821impl PciCapability for MsixCap {
822    fn bytes(&self) -> &[u8] {
823        self.as_bytes()
824    }
825
826    fn id(&self) -> PciCapabilityID {
827        PciCapabilityID::Msix
828    }
829
830    fn writable_bits(&self) -> Vec<u32> {
831        // Only msg_ctl[15:14] is writable
832        vec![0x3000_0000, 0, 0]
833    }
834}
835
836impl MsixCap {
837    pub fn new(
838        table_pci_bar: u8,
839        table_size: u16,
840        table_off: u32,
841        pba_pci_bar: u8,
842        pba_off: u32,
843    ) -> Self {
844        assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
845
846        // Set the table size and enable MSI-X.
847        let mut msg_ctl = MsixCtrl::new();
848        msg_ctl.set_enable(1);
849        // Table Size is N - 1 encoded.
850        msg_ctl.set_table_size(table_size - 1);
851
852        MsixCap {
853            _cap_vndr: 0,
854            _cap_next: 0,
855            msg_ctl,
856            table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
857            pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
858        }
859    }
860}
861
862#[cfg(test)]
863mod tests {
864
865    use std::thread;
866
867    use super::*;
868
869    #[track_caller]
870    fn recv_allocate_msi(t: &Tube) -> u32 {
871        match t.recv::<VmIrqRequest>().unwrap() {
872            VmIrqRequest::AllocateOneMsiAtGsi { gsi, .. } => gsi,
873            msg => panic!("unexpected irqchip message: {msg:?}"),
874        }
875    }
876
877    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
878    struct MsiRouteDetails {
879        gsi: u32,
880        msi_address: u64,
881        msi_data: u32,
882        #[cfg(target_arch = "aarch64")]
883        pci_address: resources::PciAddress,
884    }
885
886    const TEST_PCI_ADDRESS: resources::PciAddress = resources::PciAddress {
887        bus: 1,
888        dev: 2,
889        func: 3,
890    };
891
892    #[track_caller]
893    fn recv_add_msi_route(t: &Tube) -> MsiRouteDetails {
894        match t.recv::<VmIrqRequest>().unwrap() {
895            VmIrqRequest::AddMsiRoute {
896                gsi,
897                msi_address,
898                msi_data,
899                #[cfg(target_arch = "aarch64")]
900                pci_address,
901            } => MsiRouteDetails {
902                gsi,
903                msi_address,
904                msi_data,
905                #[cfg(target_arch = "aarch64")]
906                pci_address,
907            },
908            msg => panic!("unexpected irqchip message: {msg:?}"),
909        }
910    }
911
912    #[track_caller]
913    fn recv_release_one_irq(t: &Tube) -> u32 {
914        match t.recv::<VmIrqRequest>().unwrap() {
915            VmIrqRequest::ReleaseOneIrq { gsi, irqfd: _ } => gsi,
916            msg => panic!("unexpected irqchip message: {msg:?}"),
917        }
918    }
919
920    #[track_caller]
921    fn send_ok(t: &Tube) {
922        t.send(&VmIrqResponse::Ok).unwrap();
923    }
924
925    /// Tests a cold restore where there are no existing vectors at the time
926    /// restore is called.
927    #[test]
928    fn verify_msix_restore_cold_smoke() {
929        let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
930        let (_unused, unused_config_tube) = Tube::pair().unwrap();
931
932        let mut cfg = MsixConfig::new(2, unused_config_tube, 0, "test_device".to_owned());
933        cfg.set_pci_address(TEST_PCI_ADDRESS);
934
935        // Set up two MSI-X vectors (0 and 1).
936        // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
937        cfg.table_entries[0].msg_data = 0xd0;
938        cfg.table_entries[0].msg_addr_lo = 0xa0;
939        cfg.table_entries[0].msg_addr_hi = 0;
940        cfg.table_entries[1].msg_data = 0xd1;
941        cfg.table_entries[1].msg_addr_lo = 0xa1;
942        cfg.table_entries[1].msg_addr_hi = 0;
943
944        // Pretend that these vectors were hooked up to GSIs 10 & 20,
945        // respectively.
946        cfg.irq_vec = vec![
947            Some(IrqfdGsi {
948                gsi: 10,
949                irqfd: Event::new().unwrap(),
950            }),
951            Some(IrqfdGsi {
952                gsi: 20,
953                irqfd: Event::new().unwrap(),
954            }),
955        ];
956
957        // Take a snapshot of MsixConfig.
958        let snapshot = cfg.snapshot().unwrap();
959
960        // Create a fake irqchip to respond to our requests
961        let irqchip_fake = thread::spawn(move || {
962            assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
963            send_ok(&irqchip_tube);
964            assert_eq!(
965                recv_add_msi_route(&irqchip_tube),
966                MsiRouteDetails {
967                    gsi: 10,
968                    msi_address: 0xa0,
969                    msi_data: 0xd0,
970                    #[cfg(target_arch = "aarch64")]
971                    pci_address: TEST_PCI_ADDRESS,
972                }
973            );
974            send_ok(&irqchip_tube);
975
976            assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
977            send_ok(&irqchip_tube);
978            assert_eq!(
979                recv_add_msi_route(&irqchip_tube),
980                MsiRouteDetails {
981                    gsi: 20,
982                    msi_address: 0xa1,
983                    msi_data: 0xd1,
984                    #[cfg(target_arch = "aarch64")]
985                    pci_address: TEST_PCI_ADDRESS,
986                }
987            );
988            send_ok(&irqchip_tube);
989            irqchip_tube
990        });
991
992        let mut restored_cfg = MsixConfig::new(10, msix_config_tube, 10, "some_device".to_owned());
993        restored_cfg.restore(snapshot).unwrap();
994        irqchip_fake.join().unwrap();
995
996        assert_eq!(restored_cfg.pci_id, 0);
997        assert_eq!(restored_cfg.device_name, "test_device");
998    }
999
1000    /// Tests a warm restore where there are existing vectors at the time
1001    /// restore is called. These vectors need to be released first.
1002    #[test]
1003    fn verify_msix_restore_warm_smoke() {
1004        let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
1005
1006        let mut cfg = MsixConfig::new(2, msix_config_tube, 0, "test_device".to_owned());
1007        cfg.set_pci_address(TEST_PCI_ADDRESS);
1008
1009        // Set up two MSI-X vectors (0 and 1).
1010        // Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
1011        cfg.table_entries[0].msg_data = 0xd0;
1012        cfg.table_entries[0].msg_addr_lo = 0xa0;
1013        cfg.table_entries[0].msg_addr_hi = 0;
1014        cfg.table_entries[1].msg_data = 0xd1;
1015        cfg.table_entries[1].msg_addr_lo = 0xa1;
1016        cfg.table_entries[1].msg_addr_hi = 0;
1017
1018        // Pretend that these vectors were hooked up to GSIs 10 & 20,
1019        // respectively.
1020        cfg.irq_vec = vec![
1021            Some(IrqfdGsi {
1022                gsi: 10,
1023                irqfd: Event::new().unwrap(),
1024            }),
1025            Some(IrqfdGsi {
1026                gsi: 20,
1027                irqfd: Event::new().unwrap(),
1028            }),
1029        ];
1030
1031        // Take a snapshot of MsixConfig.
1032        let snapshot = cfg.snapshot().unwrap();
1033
1034        // Create a fake irqchip to respond to our requests
1035        let irqchip_fake = thread::spawn(move || {
1036            // First, we free the existing vectors / GSIs.
1037            assert_eq!(recv_release_one_irq(&irqchip_tube), 10);
1038            send_ok(&irqchip_tube);
1039            assert_eq!(recv_release_one_irq(&irqchip_tube), 20);
1040            send_ok(&irqchip_tube);
1041
1042            // Now we re-allocate them.
1043            assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
1044            send_ok(&irqchip_tube);
1045            assert_eq!(
1046                recv_add_msi_route(&irqchip_tube),
1047                MsiRouteDetails {
1048                    gsi: 10,
1049                    msi_address: 0xa0,
1050                    msi_data: 0xd0,
1051                    #[cfg(target_arch = "aarch64")]
1052                    pci_address: TEST_PCI_ADDRESS,
1053                }
1054            );
1055            send_ok(&irqchip_tube);
1056
1057            assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
1058            send_ok(&irqchip_tube);
1059            assert_eq!(
1060                recv_add_msi_route(&irqchip_tube),
1061                MsiRouteDetails {
1062                    gsi: 20,
1063                    msi_address: 0xa1,
1064                    msi_data: 0xd1,
1065                    #[cfg(target_arch = "aarch64")]
1066                    pci_address: TEST_PCI_ADDRESS,
1067                }
1068            );
1069            send_ok(&irqchip_tube);
1070            irqchip_tube
1071        });
1072
1073        cfg.restore(snapshot).unwrap();
1074        irqchip_fake.join().unwrap();
1075
1076        assert_eq!(cfg.pci_id, 0);
1077        assert_eq!(cfg.device_name, "test_device");
1078    }
1079}