devices/virtio/queue/
packed_queue.rs

1// Copyright 2023 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#![deny(missing_docs)]
6
7use std::num::Wrapping;
8use std::sync::atomic::fence;
9use std::sync::atomic::AtomicU16;
10use std::sync::atomic::Ordering;
11
12use anyhow::bail;
13use anyhow::Result;
14use base::error;
15use base::warn;
16use base::Event;
17use serde::Deserialize;
18use serde::Serialize;
19use snapshot::AnySnapshot;
20use virtio_sys::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
21use vm_memory::GuestAddress;
22use vm_memory::GuestMemory;
23
24use crate::virtio::descriptor_chain::DescriptorChain;
25use crate::virtio::descriptor_chain::VIRTQ_DESC_F_AVAIL;
26use crate::virtio::descriptor_chain::VIRTQ_DESC_F_USED;
27use crate::virtio::descriptor_chain::VIRTQ_DESC_F_WRITE;
28use crate::virtio::queue::packed_descriptor_chain::PackedDesc;
29use crate::virtio::queue::packed_descriptor_chain::PackedDescEvent;
30use crate::virtio::queue::packed_descriptor_chain::PackedDescriptorChain;
31use crate::virtio::queue::packed_descriptor_chain::PackedNotificationType;
32use crate::virtio::queue::packed_descriptor_chain::RING_EVENT_FLAGS_DESC;
33use crate::virtio::Interrupt;
34use crate::virtio::QueueConfig;
35
36#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
37struct PackedQueueIndex {
38    wrap_counter: bool,
39    index: Wrapping<u16>,
40}
41impl PackedQueueIndex {
42    pub fn new(wrap_counter: bool, index: u16) -> Self {
43        Self {
44            wrap_counter,
45            index: Wrapping(index),
46        }
47    }
48
49    pub fn new_from_desc(desc: u16) -> Self {
50        let wrap_counter: bool = (desc >> 15) == 1;
51        let mask: u16 = 0x7fff;
52        let index = desc & mask;
53        Self::new(wrap_counter, index)
54    }
55
56    pub fn to_desc(self) -> PackedDescEvent {
57        let flag = RING_EVENT_FLAGS_DESC;
58        let mut desc = self.index.0;
59        if self.wrap_counter {
60            desc |= 1 << 15;
61        }
62        PackedDescEvent {
63            desc: desc.into(),
64            flag: flag.into(),
65        }
66    }
67
68    fn add_index(&mut self, index_value: u16, size: u16) {
69        let new_index = self.index.0 + index_value;
70        if new_index < size {
71            self.index = Wrapping(new_index);
72        } else {
73            self.index = Wrapping(new_index - size);
74            self.wrap_counter = !self.wrap_counter;
75        }
76    }
77}
78
79impl Default for PackedQueueIndex {
80    fn default() -> Self {
81        Self::new(true, 0)
82    }
83}
84
85#[derive(Debug)]
86pub struct PackedQueue {
87    mem: GuestMemory,
88
89    event: Event,
90    interrupt: Interrupt,
91
92    // The queue size in elements the driver selected
93    size: u16,
94
95    // MSI-X vector for the queue. Don't care for INTx
96    vector: u16,
97
98    // Internal index counter to keep track of where to poll
99    avail_index: PackedQueueIndex,
100    use_index: PackedQueueIndex,
101    signalled_used_index: PackedQueueIndex,
102
103    // Device feature bits accepted by the driver
104    features: u64,
105
106    // Guest physical address of the descriptor table
107    desc_table: GuestAddress,
108
109    // Write-only by the device, Including information for reducing the number of device events
110    device_event_suppression: GuestAddress,
111
112    // Read-only by the device, Includes information for reducing the number of driver events
113    driver_event_suppression: GuestAddress,
114}
115
116#[derive(Serialize, Deserialize)]
117pub struct PackedQueueSnapshot {
118    size: u16,
119    vector: u16,
120    avail_index: PackedQueueIndex,
121    use_index: PackedQueueIndex,
122    signalled_used_index: PackedQueueIndex,
123    features: u64,
124    desc_table: GuestAddress,
125    device_event_suppression: GuestAddress,
126    driver_event_suppression: GuestAddress,
127}
128
129impl PackedQueue {
130    /// Constructs an empty virtio queue with the given `max_size`.
131    pub fn new(
132        config: &QueueConfig,
133        mem: &GuestMemory,
134        event: Event,
135        interrupt: Interrupt,
136    ) -> Result<Self> {
137        let size = config.size();
138
139        let desc_table = config.desc_table();
140        let driver_area = config.avail_ring();
141        let device_area = config.used_ring();
142
143        // Validate addresses and queue size to ensure that address calculation won't overflow.
144        let ring_sizes = Self::area_sizes(size, desc_table, driver_area, device_area);
145        let rings = ring_sizes.iter().zip(vec![
146            "descriptor table",
147            "driver_event_suppression",
148            "device_event_suppression",
149        ]);
150
151        for ((addr, size), name) in rings {
152            if addr.checked_add(*size as u64).is_none() {
153                bail!(
154                    "virtio queue {} goes out of bounds: start:0x{:08x} size:0x{:08x}",
155                    name,
156                    addr.offset(),
157                    size,
158                );
159            }
160        }
161
162        Ok(PackedQueue {
163            mem: mem.clone(),
164            event,
165            interrupt,
166            size,
167            vector: config.vector(),
168            desc_table: config.desc_table(),
169            driver_event_suppression: config.avail_ring(),
170            device_event_suppression: config.used_ring(),
171            features: config.acked_features(),
172            avail_index: PackedQueueIndex::default(),
173            use_index: PackedQueueIndex::default(),
174            signalled_used_index: PackedQueueIndex::default(),
175        })
176    }
177
178    pub fn vhost_user_reclaim(&mut self, _vring_base: u16) {
179        // TODO: b/331466964 - Need more than `vring_base` to reclaim a packed virtqueue.
180        unimplemented!()
181    }
182
183    pub fn next_avail_to_process(&self) -> u16 {
184        self.avail_index.index.0
185    }
186
187    /// Return the actual size of the queue, as the driver may not set up a
188    /// queue as big as the device allows.
189    pub fn size(&self) -> u16 {
190        self.size
191    }
192
193    /// Getter for vector field
194    pub fn vector(&self) -> u16 {
195        self.vector
196    }
197
198    /// Getter for descriptor area
199    pub fn desc_table(&self) -> GuestAddress {
200        self.desc_table
201    }
202
203    /// Getter for driver area
204    pub fn avail_ring(&self) -> GuestAddress {
205        self.driver_event_suppression
206    }
207
208    /// Getter for device area
209    pub fn used_ring(&self) -> GuestAddress {
210        self.device_event_suppression
211    }
212
213    /// Get a reference to the queue's "kick event"
214    pub fn event(&self) -> &Event {
215        &self.event
216    }
217
218    /// Get a reference to the queue's interrupt
219    pub fn interrupt(&self) -> &Interrupt {
220        &self.interrupt
221    }
222
223    fn area_sizes(
224        queue_size: u16,
225        desc_table: GuestAddress,
226        driver_area: GuestAddress,
227        device_area: GuestAddress,
228    ) -> Vec<(GuestAddress, usize)> {
229        vec![
230            (desc_table, 16 * queue_size as usize),
231            (driver_area, 4),
232            (device_area, 4),
233        ]
234    }
235
236    /// Set the device event suppression
237    ///
238    /// This field is used to specify the timing of when the driver notifies the
239    /// device that the descriptor table is ready to be processed.
240    fn set_avail_event(&mut self, event: PackedDescEvent) {
241        fence(Ordering::SeqCst);
242        self.mem
243            .write_obj_at_addr_volatile(event, self.device_event_suppression)
244            .unwrap();
245    }
246
247    // Get the driver event suppression.
248    // This field is used to specify the timing of when the device notifies the
249    // driver that the descriptor table is ready to be processed.
250    fn get_driver_event(&self) -> PackedDescEvent {
251        fence(Ordering::SeqCst);
252
253        let desc: PackedDescEvent = self
254            .mem
255            .read_obj_from_addr_volatile(self.driver_event_suppression)
256            .unwrap();
257        desc
258    }
259
260    /// Get the first available descriptor chain without removing it from the queue.
261    /// Call `pop_peeked` to remove the returned descriptor chain from the queue.
262    pub fn peek(&mut self) -> Option<DescriptorChain> {
263        let desc_addr = self
264            .desc_table
265            .checked_add((self.avail_index.index.0 as u64) * 16)
266            .expect("peeked address will not overflow");
267
268        let desc = self
269            .mem
270            .read_obj_from_addr::<PackedDesc>(desc_addr)
271            .inspect_err(|_e| {
272                error!("failed to read desc {:#x}", desc_addr.offset());
273            })
274            .ok()?;
275
276        if !desc.is_available(self.avail_index.wrap_counter as u16) {
277            return None;
278        }
279
280        // This fence ensures that subsequent reads from the descriptor do not
281        // get reordered and happen only after verifying the descriptor table is
282        // available.
283        fence(Ordering::SeqCst);
284
285        let chain = PackedDescriptorChain::new(
286            &self.mem,
287            self.desc_table,
288            self.size,
289            self.avail_index.wrap_counter,
290            self.avail_index.index.0,
291        );
292
293        match DescriptorChain::new(chain, &self.mem, self.avail_index.index.0) {
294            Ok(descriptor_chain) => Some(descriptor_chain),
295            Err(e) => {
296                error!("{:#}", e);
297                None
298            }
299        }
300    }
301
302    /// Remove the first available descriptor chain from the queue.
303    /// This function should only be called immediately following `peek` and must be passed a
304    /// reference to the same `DescriptorChain` returned by the most recent `peek`.
305    pub(super) fn pop_peeked(&mut self, descriptor_chain: &DescriptorChain) {
306        self.avail_index
307            .add_index(descriptor_chain.count, self.size());
308        if self.features & ((1u64) << VIRTIO_RING_F_EVENT_IDX) != 0 {
309            self.set_avail_event(self.avail_index.to_desc());
310        }
311    }
312
313    /// Puts multiple available descriptor heads into the used ring for use by the guest.
314    pub fn add_used_with_bytes_written_batch(
315        &mut self,
316        desc_chains: impl IntoIterator<Item = (DescriptorChain, u32)>,
317    ) {
318        // Get a `VolatileSlice` covering the descriptor table. This ensures that the raw pointers
319        // generated below point into valid `GuestMemory` regions.
320        let desc_table_size = size_of::<PackedDesc>() * usize::from(self.size);
321        let desc_table_vslice = self
322            .mem
323            .get_slice_at_addr(self.desc_table, desc_table_size)
324            .unwrap();
325
326        let desc_table_ptr = desc_table_vslice.as_mut_ptr() as *mut PackedDesc;
327
328        for (desc_chain, len) in desc_chains {
329            debug_assert!(desc_chain.index() < self.size);
330
331            let chain_id = desc_chain
332                .id
333                .expect("Packed descriptor chain should have id");
334
335            let wrap_counter = self.use_index.wrap_counter;
336
337            let mut flags: u16 = 0;
338            if wrap_counter {
339                flags = flags | VIRTQ_DESC_F_USED | VIRTQ_DESC_F_AVAIL;
340            }
341            if len > 0 {
342                flags |= VIRTQ_DESC_F_WRITE;
343            }
344
345            // SAFETY: `desc_ptr` is always a valid pointer
346            let desc_ptr = unsafe { desc_table_ptr.add(usize::from(self.use_index.index.0)) };
347
348            // SAFETY: `desc_ptr` is always a valid pointer
349            unsafe {
350                std::ptr::write_volatile(std::ptr::addr_of_mut!((*desc_ptr).len), len.into());
351                std::ptr::write_volatile(std::ptr::addr_of_mut!((*desc_ptr).id), chain_id.into());
352            }
353
354            // Writing to flags should come at the very end to avoid showing the
355            // driver fragmented descriptor data
356            fence(Ordering::Release);
357
358            // SAFETY: `desc_ptr` is always a valid pointer
359            let desc_flags_atomic = unsafe {
360                AtomicU16::from_ptr(std::ptr::addr_of_mut!((*desc_ptr).flags) as *mut u16)
361            };
362            desc_flags_atomic.store(u16::to_le(flags), Ordering::Relaxed);
363
364            self.use_index.add_index(desc_chain.count, self.size());
365        }
366    }
367
368    /// Returns if the queue should have an interrupt sent based on its state.
369    fn queue_wants_interrupt(&mut self) -> bool {
370        let driver_event = self.get_driver_event();
371        match driver_event.notification_type() {
372            PackedNotificationType::Enable => true,
373            PackedNotificationType::Disable => false,
374            PackedNotificationType::Desc(desc) => {
375                if self.features & ((1u64) << VIRTIO_RING_F_EVENT_IDX) == 0 {
376                    warn!("This is undefined behavior. We should actually send error in this case");
377                    return true;
378                }
379
380                // Reserved current use_index for next notify
381                let old = self.signalled_used_index;
382                self.signalled_used_index = self.use_index;
383
384                // Get desc_event_off and desc_event_wrap from driver event suppress area
385                let event_index: PackedQueueIndex = PackedQueueIndex::new_from_desc(desc);
386
387                let event_idx = event_index.index;
388                let old_idx = old.index;
389                let new_idx = self.use_index.index;
390
391                // In qemu's implementation, there's an additional calculation,
392                // need to verify its correctness.
393                // if event_index.wrap_counter != self.use_index.wrap_counter {
394                //     event_idx -= self.size() as u16;
395                // }
396
397                (new_idx - event_idx - Wrapping(1)) < (new_idx - old_idx)
398            }
399        };
400        true
401    }
402
403    /// inject interrupt into guest on this queue
404    /// return true: interrupt is injected into guest for this queue
405    ///        false: interrupt isn't injected
406    pub fn trigger_interrupt(&mut self) -> bool {
407        if self.queue_wants_interrupt() {
408            self.interrupt.signal_used_queue(self.vector);
409            true
410        } else {
411            false
412        }
413    }
414
415    /// Acknowledges that this set of features should be enabled on this queue.
416    pub fn ack_features(&mut self, features: u64) {
417        self.features |= features;
418    }
419
420    /// TODO: b/290307056 - Implement snapshot for packed virtqueue,
421    /// add tests to validate.
422    pub fn snapshot(&self) -> Result<AnySnapshot> {
423        bail!("Snapshot for packed virtqueue not implemented.");
424    }
425
426    /// TODO: b/290307056 - Implement restore for packed virtqueue,
427    /// add tests to validate.
428    pub fn restore(
429        _queue_value: AnySnapshot,
430        _mem: &GuestMemory,
431        _event: Event,
432        _interrupt: Interrupt,
433    ) -> Result<PackedQueue> {
434        bail!("Restore for packed virtqueue not implemented.");
435    }
436}