devices/virtio/video/decoder/backend/
mod.rs

1// Copyright 2020 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! This module implements the interface that actual decoder devices need to
6//! implement in order to provide video decoding capability to the guest.
7
8use base::AsRawDescriptor;
9
10use crate::virtio::video::decoder::Capability;
11use crate::virtio::video::error::VideoError;
12use crate::virtio::video::error::VideoResult;
13use crate::virtio::video::format::Format;
14use crate::virtio::video::format::Rect;
15use crate::virtio::video::resource::GuestResource;
16use crate::virtio::video::resource::GuestResourceHandle;
17
18#[cfg(feature = "ffmpeg")]
19pub mod ffmpeg;
20
21#[cfg(feature = "vaapi")]
22pub mod vaapi;
23#[cfg(feature = "libvda")]
24pub mod vda;
25
26/// Contains the device's state for one playback session, i.e. one stream.
27pub trait DecoderSession {
28    /// Tell how many output buffers will be used for this session and which format they will carry.
29    /// This method must be called after a `ProvidePictureBuffers` event is emitted, and before the
30    /// first call to `use_output_buffer()`.
31    fn set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()>;
32
33    /// Decode the compressed stream contained in [`offset`..`offset`+`bytes_used`] of the shared
34    /// memory in the input `resource`.
35    ///
36    /// `resource_id` is the ID of the input resource. It will be signaled using the
37    /// `NotifyEndOfBitstreamBuffer` once the input resource is not used anymore.
38    ///
39    /// `timestamp` is a timestamp that will be copied into the frames decoded from that input
40    /// stream. Units are effectively free and provided by the input stream.
41    ///
42    /// The device takes ownership of `resource` and is responsible for closing it once it is not
43    /// used anymore.
44    ///
45    /// The device will emit a `NotifyEndOfBitstreamBuffer` event with the `resource_id` value after
46    /// the input buffer has been entirely processed.
47    ///
48    /// The device will emit a `PictureReady` event with the `timestamp` value for each picture
49    /// produced from that input buffer.
50    fn decode(
51        &mut self,
52        resource_id: u32,
53        timestamp: u64,
54        resource: GuestResourceHandle,
55        offset: u32,
56        bytes_used: u32,
57    ) -> VideoResult<()>;
58
59    /// Flush the decoder device, i.e. finish processing all queued decode requests and emit frames
60    /// for them.
61    ///
62    /// The device will emit a `FlushCompleted` event once the flush is done.
63    fn flush(&mut self) -> VideoResult<()>;
64
65    /// Reset the decoder device, i.e. cancel all pending decoding requests.
66    ///
67    /// The device will emit a `ResetCompleted` event once the reset is done.
68    fn reset(&mut self) -> VideoResult<()>;
69
70    /// Immediately release all buffers passed using `use_output_buffer()` and
71    /// `reuse_output_buffer()`.
72    fn clear_output_buffers(&mut self) -> VideoResult<()>;
73
74    /// Returns the event pipe on which the availability of events will be signaled. Note that the
75    /// returned value is borrowed and only valid as long as the session is alive.
76    fn event_pipe(&self) -> &dyn AsRawDescriptor;
77
78    /// Ask the device to use `resource` to store decoded frames according to its layout.
79    /// `picture_buffer_id` is the ID of the picture that will be reproduced in `PictureReady`
80    /// events using this buffer.
81    ///
82    /// The device takes ownership of `resource` and is responsible for closing it once the buffer
83    /// is not used anymore (either when the session is closed, or a new set of buffers is provided
84    /// for the session).
85    ///
86    /// The device will emit a `PictureReady` event with the `picture_buffer_id` field set to the
87    /// same value as the argument of the same name when a frame has been decoded into that buffer.
88    fn use_output_buffer(
89        &mut self,
90        picture_buffer_id: i32,
91        resource: GuestResource,
92    ) -> VideoResult<()>;
93
94    /// Ask the device to reuse an output buffer previously passed to
95    /// `use_output_buffer` and that has previously been returned to the decoder
96    /// in a `PictureReady` event.
97    ///
98    /// The device will emit a `PictureReady` event with the `picture_buffer_id`
99    /// field set to the same value as the argument of the same name when a
100    /// frame has been decoded into that buffer.
101    fn reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()>;
102
103    /// Blocking call to read a single event from the event pipe.
104    fn read_event(&mut self) -> VideoResult<DecoderEvent>;
105}
106
107impl<S: AsMut<dyn DecoderSession> + AsRef<dyn DecoderSession> + ?Sized> DecoderSession for S {
108    fn set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()> {
109        self.as_mut().set_output_parameters(buffer_count, format)
110    }
111
112    fn decode(
113        &mut self,
114        resource_id: u32,
115        timestamp: u64,
116        resource: GuestResourceHandle,
117        offset: u32,
118        bytes_used: u32,
119    ) -> VideoResult<()> {
120        self.as_mut()
121            .decode(resource_id, timestamp, resource, offset, bytes_used)
122    }
123
124    fn flush(&mut self) -> VideoResult<()> {
125        self.as_mut().flush()
126    }
127
128    fn reset(&mut self) -> VideoResult<()> {
129        self.as_mut().reset()
130    }
131
132    fn clear_output_buffers(&mut self) -> VideoResult<()> {
133        self.as_mut().clear_output_buffers()
134    }
135
136    fn event_pipe(&self) -> &dyn AsRawDescriptor {
137        self.as_ref().event_pipe()
138    }
139
140    fn use_output_buffer(
141        &mut self,
142        picture_buffer_id: i32,
143        resource: GuestResource,
144    ) -> VideoResult<()> {
145        self.as_mut().use_output_buffer(picture_buffer_id, resource)
146    }
147
148    fn reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()> {
149        self.as_mut().reuse_output_buffer(picture_buffer_id)
150    }
151
152    fn read_event(&mut self) -> VideoResult<DecoderEvent> {
153        self.as_mut().read_event()
154    }
155}
156
157pub trait DecoderBackend: Send {
158    type Session: DecoderSession;
159
160    /// Return the decoding capabilities for this backend instance.
161    fn get_capabilities(&self) -> Capability;
162
163    /// Create a new decoding session for the passed `format`.
164    fn new_session(&mut self, format: Format) -> VideoResult<Self::Session>;
165
166    /// Turn this backend into a trait object, allowing the same decoder to operate on a set of
167    /// different backends.
168    fn into_trait_object(self) -> Box<dyn DecoderBackend<Session = Box<dyn DecoderSession>>>
169    where
170        Self: Sized + 'static,
171    {
172        Box::new(GenericDecoderBackend(self)) as Box<dyn DecoderBackend<Session = _>>
173    }
174}
175
176/// Type that changes the `Session` associated type to `Box<dyn DecoderSession>`, allowing us to
177/// use trait objects for backends.
178struct GenericDecoderBackend<S: DecoderBackend>(pub S);
179
180impl<S> DecoderBackend for GenericDecoderBackend<S>
181where
182    S: DecoderBackend,
183    <S as DecoderBackend>::Session: 'static,
184{
185    type Session = Box<dyn DecoderSession>;
186
187    fn get_capabilities(&self) -> Capability {
188        self.0.get_capabilities()
189    }
190
191    fn new_session(&mut self, format: Format) -> VideoResult<Self::Session> {
192        self.0
193            .new_session(format)
194            .map(|s| Box::new(s) as Box<dyn DecoderSession>)
195    }
196}
197
198impl<S> DecoderBackend for Box<S>
199where
200    S: ?Sized,
201    S: DecoderBackend,
202{
203    type Session = S::Session;
204
205    fn get_capabilities(&self) -> Capability {
206        self.as_ref().get_capabilities()
207    }
208
209    fn new_session(&mut self, format: Format) -> VideoResult<Self::Session> {
210        self.as_mut().new_session(format)
211    }
212}
213
214#[derive(Debug)]
215pub enum DecoderEvent {
216    /// Emitted when the device knows the buffer format it will need to decode frames, and how many
217    /// buffers it will need. The decoder is supposed to call `set_output_parameters()` to confirm
218    /// the pixel format and actual number of buffers used, and provide buffers of the requested
219    /// dimensions using `use_output_buffer()`.
220    ProvidePictureBuffers {
221        min_num_buffers: u32,
222        width: i32,
223        height: i32,
224        visible_rect: Rect,
225    },
226    /// Emitted when the decoder is done decoding a picture. `picture_buffer_id`
227    /// corresponds to the argument of the same name passed to `use_output_buffer()`
228    /// or `reuse_output_buffer()`. `bitstream_id` corresponds to the argument of
229    /// the same name passed to `decode()` and can be used to match decoded frames
230    /// to the input buffer they were produced from.
231    PictureReady {
232        picture_buffer_id: i32,
233        timestamp: u64,
234    },
235    /// Emitted when an input buffer passed to `decode()` is not used by the
236    /// device anymore and can be reused by the decoder. The parameter corresponds
237    /// to the `timestamp` argument passed to `decode()`.
238    NotifyEndOfBitstreamBuffer(u32),
239    /// Emitted when a decoding error has occured.
240    NotifyError(VideoError),
241    /// Emitted after `flush()` has been called to signal that the flush is completed.
242    FlushCompleted(VideoResult<()>),
243    /// Emitted after `reset()` has been called to signal that the reset is completed.
244    ResetCompleted(VideoResult<()>),
245}
246
247#[cfg(test)]
248/// Shared functions that can be used to test individual backends.
249mod tests {
250    use std::time::Duration;
251
252    use base::MappedRegion;
253    use base::MemoryMappingBuilder;
254    use base::SharedMemory;
255    use base::WaitContext;
256
257    use super::*;
258    use crate::virtio::video::format::FramePlane;
259    use crate::virtio::video::resource::GuestMemArea;
260    use crate::virtio::video::resource::GuestMemHandle;
261    use crate::virtio::video::resource::VirtioObjectHandle;
262
263    // Test video stream and its properties.
264    const H264_STREAM: &[u8] = include_bytes!("test-25fps.h264");
265    const H264_STREAM_WIDTH: i32 = 320;
266    const H264_STREAM_HEIGHT: i32 = 240;
267    const H264_STREAM_NUM_FRAMES: usize = 250;
268    const H264_STREAM_CRCS: &str = include_str!("test-25fps.crc");
269
270    /// Splits a H.264 annex B stream into chunks that are all guaranteed to contain a full frame
271    /// worth of data.
272    ///
273    /// This is a pretty naive implementation that is only guaranteed to work with our test stream.
274    /// We are not using `AVCodecParser` because it seems to modify the decoding context, which
275    /// would result in testing conditions that diverge more from our real use case where parsing
276    /// has already been done.
277    struct H264NalIterator<'a> {
278        stream: &'a [u8],
279        pos: usize,
280    }
281
282    impl<'a> H264NalIterator<'a> {
283        fn new(stream: &'a [u8]) -> Self {
284            Self { stream, pos: 0 }
285        }
286
287        /// Returns the position of the start of the next frame in the stream.
288        fn next_frame_pos(&self) -> Option<usize> {
289            const H264_START_CODE: [u8; 4] = [0x0, 0x0, 0x0, 0x1];
290            self.stream[self.pos + 1..]
291                .windows(H264_START_CODE.len())
292                .position(|window| window == H264_START_CODE)
293                .map(|pos| self.pos + pos + 1)
294        }
295
296        /// Returns whether `slice` contains frame data, i.e. a header where the NAL unit type is
297        /// 0x1 or 0x5.
298        fn contains_frame(slice: &[u8]) -> bool {
299            slice[4..].windows(4).any(|window| {
300                window[0..3] == [0x0, 0x0, 0x1]
301                    && (window[3] & 0x1f == 0x5 || window[3] & 0x1f == 0x1)
302            })
303        }
304    }
305
306    impl<'a> Iterator for H264NalIterator<'a> {
307        type Item = &'a [u8];
308
309        fn next(&mut self) -> Option<Self::Item> {
310            match self.pos {
311                cur_pos if cur_pos == self.stream.len() => None,
312                cur_pos => loop {
313                    self.pos = self.next_frame_pos().unwrap_or(self.stream.len());
314                    let slice = &self.stream[cur_pos..self.pos];
315
316                    // Keep advancing as long as we don't have frame data in our slice.
317                    if Self::contains_frame(slice) || self.pos == self.stream.len() {
318                        return Some(slice);
319                    }
320                },
321            }
322        }
323    }
324
325    // Build a virtio object handle from a linear memory area. This is useful to emulate the
326    // scenario where we are decoding from or into virtio objects.
327    #[allow(dead_code)]
328    pub fn build_object_handle(mem: &SharedMemory) -> GuestResourceHandle {
329        GuestResourceHandle::VirtioObject(VirtioObjectHandle {
330            desc: base::clone_descriptor(mem).unwrap(),
331            modifier: 0,
332        })
333    }
334
335    // Build a guest memory handle from a linear memory area. This is useful to emulate the
336    // scenario where we are decoding from or into guest memory.
337    #[allow(dead_code)]
338    pub fn build_guest_mem_handle(mem: &SharedMemory) -> GuestResourceHandle {
339        GuestResourceHandle::GuestPages(GuestMemHandle {
340            desc: base::clone_descriptor(mem).unwrap(),
341            mem_areas: vec![GuestMemArea {
342                offset: 0,
343                length: mem.size() as usize,
344            }],
345        })
346    }
347
348    /// Full decoding test of a H.264 video, checking that the flow of events is happening as
349    /// expected.
350    pub fn decode_h264_generic<D, I, O>(
351        decoder: &mut D,
352        input_resource_builder: I,
353        output_resource_builder: O,
354    ) where
355        D: DecoderBackend,
356        I: Fn(&SharedMemory) -> GuestResourceHandle,
357        O: Fn(&SharedMemory) -> GuestResourceHandle,
358    {
359        const NUM_OUTPUT_BUFFERS: usize = 4;
360        const INPUT_BUF_SIZE: usize = 0x4000;
361        const OUTPUT_BUFFER_SIZE: usize =
362            (H264_STREAM_WIDTH * (H264_STREAM_HEIGHT + H264_STREAM_HEIGHT / 2)) as usize;
363        let mut session = decoder
364            .new_session(Format::H264)
365            .expect("failed to create H264 decoding session.");
366        let wait_ctx = WaitContext::new().expect("Failed to create wait context");
367        wait_ctx
368            .add(session.event_pipe(), 0u8)
369            .expect("Failed to add event pipe to wait context");
370        // Output buffers suitable for receiving NV12 frames for our stream.
371        let output_buffers = (0..NUM_OUTPUT_BUFFERS)
372            .map(|i| {
373                SharedMemory::new(
374                    format!("video-output-buffer-{i}"),
375                    OUTPUT_BUFFER_SIZE as u64,
376                )
377                .unwrap()
378            })
379            .collect::<Vec<_>>();
380        let input_shm = SharedMemory::new("video-input-buffer", INPUT_BUF_SIZE as u64).unwrap();
381        let input_mapping = MemoryMappingBuilder::new(input_shm.size() as usize)
382            .from_shared_memory(&input_shm)
383            .build()
384            .unwrap();
385
386        let mut decoded_frames_count = 0usize;
387        let mut expected_frames_crcs = H264_STREAM_CRCS.lines();
388
389        let mut on_frame_decoded = |session: &mut D::Session, picture_buffer_id: i32| {
390            // Verify that the CRC of the decoded frame matches the expected one.
391            let mapping = MemoryMappingBuilder::new(OUTPUT_BUFFER_SIZE)
392                .from_shared_memory(&output_buffers[picture_buffer_id as usize])
393                .build()
394                .unwrap();
395            let mut frame_data = vec![0u8; mapping.size()];
396            assert_eq!(
397                mapping.read_slice(&mut frame_data, 0).unwrap(),
398                mapping.size()
399            );
400
401            let mut hasher = crc32fast::Hasher::new();
402            hasher.update(&frame_data);
403            let frame_crc = hasher.finalize();
404            assert_eq!(
405                format!("{frame_crc:08x}"),
406                expected_frames_crcs
407                    .next()
408                    .expect("No CRC for decoded frame")
409            );
410
411            // We can recycle the frame now.
412            session.reuse_output_buffer(picture_buffer_id).unwrap();
413            decoded_frames_count += 1;
414        };
415
416        // Simple value by which we will multiply the frame number to obtain a fake timestamp.
417        const TIMESTAMP_FOR_INPUT_ID_FACTOR: u64 = 1_000_000;
418        for (input_id, slice) in H264NalIterator::new(H264_STREAM).enumerate() {
419            let buffer_handle = input_resource_builder(&input_shm);
420            input_mapping
421                .write_slice(slice, 0)
422                .expect("Failed to write stream data into input buffer.");
423            session
424                .decode(
425                    input_id as u32,
426                    input_id as u64 * TIMESTAMP_FOR_INPUT_ID_FACTOR,
427                    buffer_handle,
428                    0,
429                    slice.len() as u32,
430                )
431                .expect("Call to decode() failed.");
432
433            // Get all the events resulting from this submission.
434            let mut events = Vec::new();
435            while !wait_ctx.wait_timeout(Duration::ZERO).unwrap().is_empty() {
436                events.push(session.read_event().unwrap());
437            }
438
439            // Our bitstream buffer should have been returned.
440            let event_idx = events
441                .iter()
442                .position(|event| {
443                    let input_id = input_id as u32;
444                    matches!(event, DecoderEvent::NotifyEndOfBitstreamBuffer(index) if *index == input_id)
445                })
446                .unwrap();
447            events.remove(event_idx);
448
449            // After sending the first buffer we should get the initial resolution change event and
450            // can provide the frames to decode into.
451            if input_id == 0 {
452                let event_idx = events
453                    .iter()
454                    .position(|event| {
455                        matches!(
456                            event,
457                            DecoderEvent::ProvidePictureBuffers {
458                                width: H264_STREAM_WIDTH,
459                                height: H264_STREAM_HEIGHT,
460                                visible_rect: Rect {
461                                    left: 0,
462                                    top: 0,
463                                    right: H264_STREAM_WIDTH,
464                                    bottom: H264_STREAM_HEIGHT,
465                                },
466                                ..
467                            }
468                        )
469                    })
470                    .unwrap();
471                events.remove(event_idx);
472
473                let out_format = Format::NV12;
474
475                session
476                    .set_output_parameters(NUM_OUTPUT_BUFFERS, out_format)
477                    .unwrap();
478
479                // Pass the buffers we will decode into.
480                for (picture_buffer_id, buffer) in output_buffers.iter().enumerate() {
481                    session
482                        .use_output_buffer(
483                            picture_buffer_id as i32,
484                            GuestResource {
485                                handle: output_resource_builder(buffer),
486                                planes: vec![
487                                    FramePlane {
488                                        offset: 0,
489                                        stride: H264_STREAM_WIDTH as usize,
490                                        size: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,
491                                    },
492                                    FramePlane {
493                                        offset: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,
494                                        stride: H264_STREAM_WIDTH as usize,
495                                        size: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,
496                                    },
497                                ],
498                                width: H264_STREAM_WIDTH as _,
499                                height: H264_STREAM_HEIGHT as _,
500                                format: out_format,
501                                guest_cpu_mappable: false,
502                            },
503                        )
504                        .unwrap();
505                }
506            }
507
508            // If we have remaining events, they must be decoded frames. Get them and recycle them.
509            for event in events {
510                match event {
511                    DecoderEvent::PictureReady {
512                        picture_buffer_id, ..
513                    } => on_frame_decoded(&mut session, picture_buffer_id),
514                    e => panic!("Unexpected event: {e:?}"),
515                }
516            }
517        }
518
519        session.flush().unwrap();
520
521        // Keep getting frames until the final event, which should be `FlushCompleted`.
522        let mut received_flush_completed = false;
523        while !wait_ctx.wait_timeout(Duration::ZERO).unwrap().is_empty() {
524            match session.read_event().unwrap() {
525                DecoderEvent::PictureReady {
526                    picture_buffer_id, ..
527                } => on_frame_decoded(&mut session, picture_buffer_id),
528                DecoderEvent::FlushCompleted(Ok(())) => {
529                    received_flush_completed = true;
530                    break;
531                }
532                e => panic!("Unexpected event: {e:?}"),
533            }
534        }
535
536        // Confirm that we got the FlushCompleted event.
537        assert!(received_flush_completed);
538
539        // We should have read all the events for that session.
540        assert_eq!(wait_ctx.wait_timeout(Duration::ZERO).unwrap().len(), 0);
541
542        // We should not be expecting any more frame
543        assert_eq!(expected_frames_crcs.next(), None);
544
545        // Check that we decoded the expected number of frames.
546        assert_eq!(decoded_frames_count, H264_STREAM_NUM_FRAMES);
547    }
548}