crosvm/crosvm/sys/linux/
device_helpers.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::collections::BTreeMap;
6use std::collections::BTreeSet;
7use std::convert::TryFrom;
8use std::fs::File;
9use std::fs::OpenOptions;
10use std::io::ErrorKind;
11use std::ops::RangeInclusive;
12use std::os::unix::net::UnixStream;
13use std::path::Path;
14use std::path::PathBuf;
15use std::str;
16use std::sync::Arc;
17use std::time::Duration;
18use std::time::Instant;
19
20use anyhow::anyhow;
21use anyhow::bail;
22use anyhow::Context;
23use anyhow::Result;
24use arch::VirtioDeviceStub;
25use base::linux::MemfdSeals;
26use base::sys::SharedMemoryLinux;
27use base::ReadNotifier;
28use base::*;
29use devices::serial_device::SerialParameters;
30use devices::serial_device::SerialType;
31use devices::vfio::VfioContainerManager;
32use devices::virtio;
33use devices::virtio::block::DiskOption;
34#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
35use devices::virtio::device_constants::video::VideoBackendType;
36#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
37use devices::virtio::device_constants::video::VideoDeviceType;
38use devices::virtio::ipc_memory_mapper::create_ipc_mapper;
39use devices::virtio::ipc_memory_mapper::CreateIpcMapperRet;
40use devices::virtio::memory_mapper::BasicMemoryMapper;
41use devices::virtio::memory_mapper::MemoryMapperTrait;
42#[cfg(feature = "pvclock")]
43use devices::virtio::pvclock::PvClock;
44use devices::virtio::scsi::ScsiOption;
45#[cfg(feature = "audio")]
46use devices::virtio::snd::parameters::Parameters as SndParameters;
47use devices::virtio::vfio_wrapper::VfioWrapper;
48#[cfg(feature = "net")]
49use devices::virtio::vhost_user_backend::NetBackend;
50use devices::virtio::vhost_user_backend::VhostUserDeviceBuilder;
51use devices::virtio::vhost_user_backend::VhostUserVsockDevice;
52use devices::virtio::vsock::VsockConfig;
53use devices::virtio::Console;
54use devices::virtio::MemSlotConfig;
55#[cfg(feature = "net")]
56use devices::virtio::NetError;
57#[cfg(feature = "net")]
58use devices::virtio::NetParameters;
59#[cfg(feature = "net")]
60use devices::virtio::NetParametersMode;
61use devices::virtio::PmemConfig;
62use devices::virtio::VhostUserFrontend;
63use devices::virtio::VirtioDevice;
64use devices::virtio::VirtioDeviceType;
65use devices::BusDeviceObj;
66use devices::IommuDevType;
67use devices::PciAddress;
68use devices::PciDevice;
69use devices::VfioDevice;
70use devices::VfioDeviceType;
71use devices::VfioPciDevice;
72use devices::VfioPlatformDevice;
73#[cfg(feature = "vtpm")]
74use devices::VtpmProxy;
75use hypervisor::MemCacheType;
76use hypervisor::ProtectionType;
77use hypervisor::Vm;
78use jail::*;
79use minijail::Minijail;
80#[cfg(feature = "net")]
81use net_util::sys::linux::Tap;
82#[cfg(feature = "net")]
83use net_util::MacAddress;
84#[cfg(feature = "net")]
85use net_util::TapTCommon;
86use resources::Alloc;
87use resources::AllocOptions;
88use resources::SystemAllocator;
89use sync::Mutex;
90use vm_control::api::VmMemoryClient;
91use vm_memory::GuestAddress;
92
93use crate::crosvm::config::PmemOption;
94use crate::crosvm::config::VhostUserFrontendOption;
95use crate::crosvm::sys::config::PmemExt2Option;
96
97/// All the tube types collected and passed to `run_control`.
98///
99/// This mainly exists to simplify the device setup plumbing. We collect the tubes of all the
100/// devices into one list using this enum and then separate them out in `run_control` to be handled
101/// individually.
102#[remain::sorted]
103pub enum AnyControlTube {
104    DeviceControlTube(DeviceControlTube),
105    /// Receives `IrqHandlerRequest`.
106    IrqTube(Tube),
107    TaggedControlTube(TaggedControlTube),
108    VmMemoryTube(VmMemoryTube),
109}
110
111impl From<DeviceControlTube> for AnyControlTube {
112    fn from(value: DeviceControlTube) -> Self {
113        AnyControlTube::DeviceControlTube(value)
114    }
115}
116
117impl From<TaggedControlTube> for AnyControlTube {
118    fn from(value: TaggedControlTube) -> Self {
119        AnyControlTube::TaggedControlTube(value)
120    }
121}
122
123impl From<VmMemoryTube> for AnyControlTube {
124    fn from(value: VmMemoryTube) -> Self {
125        AnyControlTube::VmMemoryTube(value)
126    }
127}
128
129/// Tubes that initiate requests to devices.
130#[remain::sorted]
131pub enum DeviceControlTube {
132    // See `BalloonTube`.
133    #[cfg(feature = "balloon")]
134    Balloon(Tube),
135    // Sends `DiskControlCommand`.
136    Disk(Tube),
137    // Sends `GpuControlCommand`.
138    #[cfg(feature = "gpu")]
139    Gpu(Tube),
140    // Sends `PvClockCommand`.
141    #[cfg(feature = "pvclock")]
142    PvClock(Tube),
143    #[cfg(feature = "audio")]
144    Snd(Tube),
145}
146
147/// Tubes that service requests from devices.
148///
149/// Only includes those that happen to be handled together in the main `WaitContext` loop.
150pub enum TaggedControlTube {
151    /// Receives `FsMappingRequest`.
152    Fs(Tube),
153    /// Receives `VmRequest`.
154    Vm(Tube),
155    /// Receives `VmMemoryMappingRequest`.
156    VmMsync(Tube),
157}
158
159impl AsRef<Tube> for TaggedControlTube {
160    fn as_ref(&self) -> &Tube {
161        use self::TaggedControlTube::*;
162        match &self {
163            Fs(tube) | Vm(tube) | VmMsync(tube) => tube,
164        }
165    }
166}
167
168impl AsRawDescriptor for TaggedControlTube {
169    fn as_raw_descriptor(&self) -> RawDescriptor {
170        self.as_ref().as_raw_descriptor()
171    }
172}
173
174impl ReadNotifier for TaggedControlTube {
175    fn get_read_notifier(&self) -> &dyn AsRawDescriptor {
176        self.as_ref().get_read_notifier()
177    }
178}
179
180/// Tubes that service `VmMemoryRequest` requests from devices.
181#[derive(serde::Serialize, serde::Deserialize)]
182pub struct VmMemoryTube {
183    pub tube: Tube,
184    /// See devices::virtio::VirtioDevice.expose_shared_memory_region_with_viommu
185    pub expose_with_viommu: bool,
186}
187
188impl AsRef<Tube> for VmMemoryTube {
189    fn as_ref(&self) -> &Tube {
190        &self.tube
191    }
192}
193
194impl AsRawDescriptor for VmMemoryTube {
195    fn as_raw_descriptor(&self) -> RawDescriptor {
196        self.as_ref().as_raw_descriptor()
197    }
198}
199
200impl ReadNotifier for VmMemoryTube {
201    fn get_read_notifier(&self) -> &dyn AsRawDescriptor {
202        self.as_ref().get_read_notifier()
203    }
204}
205
206pub trait IntoUnixStream {
207    fn into_unix_stream(self) -> Result<UnixStream>;
208}
209
210impl IntoUnixStream for &Path {
211    fn into_unix_stream(self) -> Result<UnixStream> {
212        if let Some(fd) = safe_descriptor_from_path(self)
213            .with_context(|| format!("failed to open event device '{}'", self.display()))?
214        {
215            Ok(fd.into())
216        } else {
217            UnixStream::connect(self)
218                .with_context(|| format!("failed to open event device '{}'", self.display()))
219        }
220    }
221}
222
223impl IntoUnixStream for &PathBuf {
224    fn into_unix_stream(self) -> Result<UnixStream> {
225        self.as_path().into_unix_stream()
226    }
227}
228
229impl IntoUnixStream for UnixStream {
230    fn into_unix_stream(self) -> Result<UnixStream> {
231        Ok(self)
232    }
233}
234
235pub type DeviceResult<T = VirtioDeviceStub> = Result<T>;
236
237/// A trait for spawning virtio device instances and jails from their configuration structure.
238///
239/// Implementors become able to create virtio devices and jails following their own configuration.
240/// This trait also provides a few convenience methods for e.g. creating a virtio device and jail
241/// at once.
242pub trait VirtioDeviceBuilder: Sized {
243    /// Base name of the device, as it will appear in logs.
244    const NAME: &'static str;
245
246    /// Create a regular virtio device from the configuration and `protection_type` setting.
247    fn create_virtio_device(
248        self,
249        protection_type: ProtectionType,
250    ) -> anyhow::Result<Box<dyn VirtioDevice>>;
251
252    /// Create a device suitable for being run as a vhost-user instance.
253    ///
254    /// It is ok to leave this method unimplemented if the device is not intended to be used with
255    /// vhost-user.
256    fn create_vhost_user_device(
257        self,
258        _keep_rds: &mut Vec<RawDescriptor>,
259    ) -> anyhow::Result<Box<dyn VhostUserDeviceBuilder>> {
260        unimplemented!()
261    }
262
263    /// Create a jail that is suitable to run a device.
264    ///
265    /// The default implementation creates a simple jail with a seccomp policy derived from the
266    /// base name of the device.
267    fn create_jail(
268        &self,
269        jail_config: Option<&JailConfig>,
270        virtio_transport: VirtioDeviceType,
271    ) -> anyhow::Result<Option<Minijail>> {
272        simple_jail(
273            jail_config,
274            &virtio_transport.seccomp_policy_file(Self::NAME),
275        )
276    }
277
278    /// Helper method to return a `VirtioDeviceStub` filled using `create_virtio_device` and
279    /// `create_jail`.
280    ///
281    /// This helper should cover the needs of most devices when run as regular virtio devices.
282    fn create_virtio_device_and_jail(
283        self,
284        protection_type: ProtectionType,
285        jail_config: Option<&JailConfig>,
286    ) -> DeviceResult {
287        let jail = self.create_jail(jail_config, VirtioDeviceType::Regular)?;
288        let dev = self.create_virtio_device(protection_type)?;
289        Ok(VirtioDeviceStub { dev, jail })
290    }
291}
292
293/// A one-shot configuration structure for implementing `VirtioDeviceBuilder`. We cannot do it on
294/// `DiskOption` directly because disk devices can be passed an optional control tube.
295pub struct DiskConfig<'a> {
296    /// Options for disk creation.
297    disk: &'a DiskOption,
298    /// Optional control tube for the device.
299    device_tube: Option<Tube>,
300}
301
302impl<'a> DiskConfig<'a> {
303    pub fn new(disk: &'a DiskOption, device_tube: Option<Tube>) -> Self {
304        Self { disk, device_tube }
305    }
306}
307
308impl VirtioDeviceBuilder for DiskConfig<'_> {
309    const NAME: &'static str = "block";
310
311    fn create_virtio_device(
312        self,
313        protection_type: ProtectionType,
314    ) -> anyhow::Result<Box<dyn VirtioDevice>> {
315        info!(
316            "Trying to attach block device: {}",
317            self.disk.path.display(),
318        );
319        let disk_image = self.disk.open()?;
320        let base_features = virtio::base_features(protection_type);
321        Ok(Box::new(
322            virtio::BlockAsync::new(
323                base_features,
324                disk_image,
325                self.disk,
326                self.device_tube,
327                None,
328                None,
329            )
330            .context("failed to create block device")?,
331        ))
332    }
333
334    fn create_vhost_user_device(
335        self,
336        keep_rds: &mut Vec<RawDescriptor>,
337    ) -> anyhow::Result<Box<dyn VhostUserDeviceBuilder>> {
338        let disk = self.disk;
339        let disk_image = disk.open()?;
340        let base_features = virtio::base_features(ProtectionType::Unprotected);
341
342        let block = Box::new(
343            virtio::BlockAsync::new(
344                base_features,
345                disk_image,
346                disk,
347                self.device_tube,
348                None,
349                None,
350            )
351            .context("failed to create block device")?,
352        );
353        keep_rds.extend(block.keep_rds());
354
355        Ok(block)
356    }
357}
358
359pub struct ScsiConfig<'a>(pub &'a [ScsiOption]);
360
361impl<'a> VirtioDeviceBuilder for &'a ScsiConfig<'a> {
362    const NAME: &'static str = "scsi";
363
364    fn create_virtio_device(
365        self,
366        protection_type: ProtectionType,
367    ) -> anyhow::Result<Box<dyn VirtioDevice>> {
368        let base_features = virtio::base_features(protection_type);
369        let disks = self
370            .0
371            .iter()
372            .map(|op| {
373                info!("Trying to attach a scsi device: {}", op.path.display());
374                let file = op.open()?;
375                Ok(virtio::ScsiDiskConfig {
376                    file,
377                    block_size: op.block_size,
378                    read_only: op.read_only,
379                })
380            })
381            .collect::<anyhow::Result<_>>()?;
382        let controller = virtio::ScsiController::new(base_features, disks)
383            .context("failed to create a scsi controller")?;
384        Ok(Box::new(controller))
385    }
386}
387
388fn vhost_user_connection(
389    path: &Path,
390    connect_timeout_ms: Option<u64>,
391) -> Result<vmm_vhost::Connection> {
392    let deadline = connect_timeout_ms.map(|t| Instant::now() + Duration::from_millis(t));
393    let mut first = true;
394    loop {
395        match UnixStream::connect(path) {
396            Ok(sock) => {
397                let connection = sock
398                    .try_into()
399                    .context("failed to construct Connection from UnixStream")?;
400                return Ok(connection);
401            }
402            Err(e) => {
403                // ConnectionRefused => Might be a stale file the backend hasn't deleted yet.
404                // NotFound => Might be the backend hasn't bound the socket yet.
405                if e.kind() == ErrorKind::ConnectionRefused || e.kind() == ErrorKind::NotFound {
406                    if let Some(deadline) = deadline {
407                        if first {
408                            first = false;
409                            warn!(
410                                "vhost-user socket path {} not available. retrying up to {} ms",
411                                path.display(),
412                                connect_timeout_ms.unwrap()
413                            );
414                        }
415                        if Instant::now() > deadline {
416                            anyhow::bail!(
417                                "timeout waiting for vhost-user socket path {}: final error: {e:#}",
418                                path.display()
419                            );
420                        }
421                        std::thread::sleep(Duration::from_millis(1));
422                        continue;
423                    }
424                }
425                return Err(e).with_context(|| {
426                    format!(
427                        "failed to connect to vhost-user socket path {}",
428                        path.display()
429                    )
430                });
431            }
432        }
433    }
434}
435
436pub fn create_vhost_user_frontend(
437    protection_type: ProtectionType,
438    opt: &VhostUserFrontendOption,
439    connect_timeout_ms: Option<u64>,
440    vm_evt_wrtube: base::SendTube,
441) -> DeviceResult {
442    let connection = if let Some(socket_fd) = safe_descriptor_from_path(&opt.socket)? {
443        socket_fd
444            .try_into()
445            .context("failed to create vhost-user connection from fd")?
446    } else {
447        vhost_user_connection(&opt.socket, connect_timeout_ms)?
448    };
449    let dev = VhostUserFrontend::new(
450        opt.type_,
451        virtio::base_features(protection_type),
452        connection,
453        vm_evt_wrtube,
454        opt.max_queue_size,
455        opt.pci_address,
456    )
457    .context("failed to set up vhost-user frontend")?;
458
459    Ok(VirtioDeviceStub {
460        dev: Box::new(dev),
461        // no sandbox here because virtqueue handling is exported to a different process.
462        jail: None,
463    })
464}
465
466pub fn create_virtio_rng_device(
467    protection_type: ProtectionType,
468    jail_config: Option<&JailConfig>,
469) -> DeviceResult {
470    let dev =
471        virtio::Rng::new(virtio::base_features(protection_type)).context("failed to set up rng")?;
472
473    Ok(VirtioDeviceStub {
474        dev: Box::new(dev),
475        jail: simple_jail(jail_config, "rng_device")?,
476    })
477}
478
479#[cfg(feature = "audio")]
480pub fn create_virtio_snd_device(
481    protection_type: ProtectionType,
482    jail_config: Option<&JailConfig>,
483    snd_params: SndParameters,
484    snd_device_tube: Tube,
485) -> DeviceResult {
486    let backend = snd_params.backend;
487    let dev = virtio::snd::common_backend::VirtioSnd::new(
488        virtio::base_features(protection_type),
489        snd_params,
490        snd_device_tube,
491    )
492    .context("failed to create cras sound device")?;
493
494    use virtio::snd::parameters::StreamSourceBackend as Backend;
495
496    let policy = match backend {
497        Backend::NULL | Backend::FILE => "snd_null_device",
498        #[cfg(feature = "audio_aaudio")]
499        Backend::Sys(virtio::snd::sys::StreamSourceBackend::AAUDIO) => "snd_aaudio_device",
500        #[cfg(feature = "audio_cras")]
501        Backend::Sys(virtio::snd::sys::StreamSourceBackend::CRAS) => "snd_cras_device",
502        #[cfg(not(any(feature = "audio_cras", feature = "audio_aaudio")))]
503        _ => unreachable!(),
504    };
505
506    let jail = if let Some(jail_config) = jail_config {
507        let mut config = SandboxConfig::new(jail_config, policy);
508        #[cfg(feature = "audio_cras")]
509        if backend == Backend::Sys(virtio::snd::sys::StreamSourceBackend::CRAS) {
510            config.bind_mounts = true;
511        }
512        // TODO(b/267574679): running as current_user may not be required for snd device.
513        config.run_as = RunAsUser::CurrentUser;
514        #[allow(unused_mut)]
515        let mut jail =
516            create_sandbox_minijail(&jail_config.pivot_root, MAX_OPEN_FILES_DEFAULT, &config)?;
517        #[cfg(feature = "audio_cras")]
518        if backend == Backend::Sys(virtio::snd::sys::StreamSourceBackend::CRAS) {
519            let run_cras_path = Path::new("/run/cras");
520            jail.mount_bind(run_cras_path, run_cras_path, true)?;
521        }
522        Some(jail)
523    } else {
524        None
525    };
526
527    Ok(VirtioDeviceStub {
528        dev: Box::new(dev),
529        jail,
530    })
531}
532
533#[cfg(feature = "vtpm")]
534pub fn create_vtpm_proxy_device(
535    protection_type: ProtectionType,
536    jail_config: Option<&JailConfig>,
537) -> DeviceResult {
538    let jail = if let Some(jail_config) = jail_config {
539        let mut config = SandboxConfig::new(jail_config, "vtpm_proxy_device");
540        config.bind_mounts = true;
541        let mut jail =
542            create_sandbox_minijail(&jail_config.pivot_root, MAX_OPEN_FILES_DEFAULT, &config)?;
543        let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
544        jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
545        Some(jail)
546    } else {
547        None
548    };
549
550    let backend = VtpmProxy::new();
551    let dev = virtio::Tpm::new(Box::new(backend), virtio::base_features(protection_type));
552
553    Ok(VirtioDeviceStub {
554        dev: Box::new(dev),
555        jail,
556    })
557}
558
559pub fn create_single_touch_device<T: IntoUnixStream>(
560    protection_type: ProtectionType,
561    jail_config: Option<&JailConfig>,
562    single_touch_socket: T,
563    width: u32,
564    height: u32,
565    name: Option<&str>,
566    idx: u32,
567) -> DeviceResult {
568    let socket = single_touch_socket
569        .into_unix_stream()
570        .context("failed configuring virtio single touch")?;
571
572    let dev = virtio::input::new_single_touch(
573        idx,
574        socket,
575        width,
576        height,
577        name,
578        virtio::base_features(protection_type),
579    )
580    .context("failed to set up input device")?;
581    Ok(VirtioDeviceStub {
582        dev: Box::new(dev),
583        jail: simple_jail(jail_config, "input_device")?,
584    })
585}
586
587pub fn create_multi_touch_device<T: IntoUnixStream>(
588    protection_type: ProtectionType,
589    jail_config: Option<&JailConfig>,
590    multi_touch_socket: T,
591    width: u32,
592    height: u32,
593    name: Option<&str>,
594    idx: u32,
595) -> DeviceResult {
596    let socket = multi_touch_socket
597        .into_unix_stream()
598        .context("failed configuring virtio multi touch")?;
599
600    let dev = virtio::input::new_multi_touch(
601        idx,
602        socket,
603        width,
604        height,
605        name,
606        virtio::base_features(protection_type),
607    )
608    .context("failed to set up input device")?;
609
610    Ok(VirtioDeviceStub {
611        dev: Box::new(dev),
612        jail: simple_jail(jail_config, "input_device")?,
613    })
614}
615
616pub fn create_trackpad_device<T: IntoUnixStream>(
617    protection_type: ProtectionType,
618    jail_config: Option<&JailConfig>,
619    trackpad_socket: T,
620    width: u32,
621    height: u32,
622    name: Option<&str>,
623    idx: u32,
624) -> DeviceResult {
625    let socket = trackpad_socket
626        .into_unix_stream()
627        .context("failed configuring virtio trackpad")?;
628
629    let dev = virtio::input::new_trackpad(
630        idx,
631        socket,
632        width,
633        height,
634        name,
635        virtio::base_features(protection_type),
636    )
637    .context("failed to set up input device")?;
638
639    Ok(VirtioDeviceStub {
640        dev: Box::new(dev),
641        jail: simple_jail(jail_config, "input_device")?,
642    })
643}
644
645pub fn create_multitouch_trackpad_device<T: IntoUnixStream>(
646    protection_type: ProtectionType,
647    jail_config: Option<&JailConfig>,
648    trackpad_socket: T,
649    width: u32,
650    height: u32,
651    name: Option<&str>,
652    idx: u32,
653) -> DeviceResult {
654    let socket = trackpad_socket
655        .into_unix_stream()
656        .context("failed configuring virtio trackpad")?;
657
658    let dev = virtio::input::new_multitouch_trackpad(
659        idx,
660        socket,
661        width,
662        height,
663        name,
664        virtio::base_features(protection_type),
665    )
666    .context("failed to set up input device")?;
667
668    Ok(VirtioDeviceStub {
669        dev: Box::new(dev),
670        jail: simple_jail(jail_config, "input_device")?,
671    })
672}
673
674pub fn create_mouse_device<T: IntoUnixStream>(
675    protection_type: ProtectionType,
676    jail_config: Option<&JailConfig>,
677    mouse_socket: T,
678    idx: u32,
679) -> DeviceResult {
680    let socket = mouse_socket
681        .into_unix_stream()
682        .context("failed configuring virtio mouse")?;
683
684    let dev = virtio::input::new_mouse(idx, socket, virtio::base_features(protection_type))
685        .context("failed to set up input device")?;
686
687    Ok(VirtioDeviceStub {
688        dev: Box::new(dev),
689        jail: simple_jail(jail_config, "input_device")?,
690    })
691}
692
693pub fn create_keyboard_device<T: IntoUnixStream>(
694    protection_type: ProtectionType,
695    jail_config: Option<&JailConfig>,
696    keyboard_socket: T,
697    idx: u32,
698) -> DeviceResult {
699    let socket = keyboard_socket
700        .into_unix_stream()
701        .context("failed configuring virtio keyboard")?;
702
703    let dev = virtio::input::new_keyboard(idx, socket, virtio::base_features(protection_type))
704        .context("failed to set up input device")?;
705
706    Ok(VirtioDeviceStub {
707        dev: Box::new(dev),
708        jail: simple_jail(jail_config, "input_device")?,
709    })
710}
711
712pub fn create_switches_device<T: IntoUnixStream>(
713    protection_type: ProtectionType,
714    jail_config: Option<&JailConfig>,
715    switches_socket: T,
716    idx: u32,
717) -> DeviceResult {
718    let socket = switches_socket
719        .into_unix_stream()
720        .context("failed configuring virtio switches")?;
721
722    let dev = virtio::input::new_switches(idx, socket, virtio::base_features(protection_type))
723        .context("failed to set up input device")?;
724
725    Ok(VirtioDeviceStub {
726        dev: Box::new(dev),
727        jail: simple_jail(jail_config, "input_device")?,
728    })
729}
730
731pub fn create_rotary_device<T: IntoUnixStream>(
732    protection_type: ProtectionType,
733    jail_config: Option<&JailConfig>,
734    rotary_socket: T,
735    idx: u32,
736) -> DeviceResult {
737    let socket = rotary_socket
738        .into_unix_stream()
739        .context("failed configuring virtio rotary")?;
740
741    let dev = virtio::input::new_rotary(idx, socket, virtio::base_features(protection_type))
742        .context("failed to set up input device")?;
743
744    Ok(VirtioDeviceStub {
745        dev: Box::new(dev),
746        jail: simple_jail(jail_config, "input_device")?,
747    })
748}
749
750pub fn create_vinput_device(
751    protection_type: ProtectionType,
752    jail_config: Option<&JailConfig>,
753    dev_path: &Path,
754) -> DeviceResult {
755    let dev_file = OpenOptions::new()
756        .read(true)
757        .write(true)
758        .open(dev_path)
759        .with_context(|| format!("failed to open vinput device {}", dev_path.display()))?;
760
761    let dev = virtio::input::new_evdev(dev_file, virtio::base_features(protection_type))
762        .context("failed to set up input device")?;
763
764    Ok(VirtioDeviceStub {
765        dev: Box::new(dev),
766        jail: simple_jail(jail_config, "input_device")?,
767    })
768}
769
770pub fn create_custom_device<T: IntoUnixStream>(
771    protection_type: ProtectionType,
772    jail_config: Option<&JailConfig>,
773    custom_device_socket: T,
774    idx: u32,
775    input_config_path: PathBuf,
776) -> DeviceResult {
777    let socket = custom_device_socket
778        .into_unix_stream()
779        .context("failed configuring custom virtio input device")?;
780
781    let dev = virtio::input::new_custom(
782        idx,
783        socket,
784        input_config_path,
785        virtio::base_features(protection_type),
786    )
787    .context("failed to set up input device")?;
788
789    Ok(VirtioDeviceStub {
790        dev: Box::new(dev),
791        jail: simple_jail(jail_config, "input_device")?,
792    })
793}
794
795#[cfg(feature = "balloon")]
796pub fn create_balloon_device(
797    protection_type: ProtectionType,
798    jail_config: Option<&JailConfig>,
799    tube: Tube,
800    inflate_tube: Option<Tube>,
801    init_balloon_size: u64,
802    vm_memory_client: VmMemoryClient,
803    enabled_features: u64,
804    #[cfg(feature = "registered_events")] registered_evt_q: Option<SendTube>,
805    ws_num_bins: u8,
806) -> DeviceResult {
807    let dev = virtio::Balloon::new(
808        virtio::base_features(protection_type),
809        tube,
810        vm_memory_client,
811        inflate_tube,
812        init_balloon_size,
813        enabled_features,
814        #[cfg(feature = "registered_events")]
815        registered_evt_q,
816        ws_num_bins,
817    )
818    .context("failed to create balloon")?;
819
820    Ok(VirtioDeviceStub {
821        dev: Box::new(dev),
822        jail: simple_jail(jail_config, "balloon_device")?,
823    })
824}
825
826#[cfg(feature = "pvclock")]
827pub fn create_pvclock_device(
828    protection_type: ProtectionType,
829    jail_config: Option<&JailConfig>,
830    tsc_frequency: u64,
831    suspend_tube: Tube,
832) -> DeviceResult {
833    let dev = PvClock::new(
834        virtio::base_features(protection_type),
835        tsc_frequency,
836        suspend_tube,
837    );
838
839    Ok(VirtioDeviceStub {
840        dev: Box::new(dev),
841        jail: simple_jail(jail_config, "pvclock_device")?,
842    })
843}
844
845#[cfg(feature = "net")]
846impl VirtioDeviceBuilder for &NetParameters {
847    const NAME: &'static str = "net";
848
849    fn create_virtio_device(
850        self,
851        protection_type: ProtectionType,
852    ) -> anyhow::Result<Box<dyn VirtioDevice>> {
853        let vq_pairs = self.vq_pairs.unwrap_or(1);
854        let multi_vq = vq_pairs > 1 && self.vhost_net.is_none();
855
856        let features = virtio::base_features(protection_type);
857        let (tap, mac) = create_tap_for_net_device(&self.mode, multi_vq)?;
858
859        Ok(if let Some(vhost_net) = &self.vhost_net {
860            Box::new(
861                virtio::vhost::Net::<_, vhost::Net<_>>::new(
862                    &vhost_net.device,
863                    features,
864                    tap,
865                    mac,
866                    self.packed_queue,
867                    self.pci_address,
868                    self.mrg_rxbuf,
869                )
870                .context("failed to set up virtio-vhost networking")?,
871            ) as Box<dyn VirtioDevice>
872        } else {
873            Box::new(
874                virtio::Net::new(
875                    features,
876                    tap,
877                    vq_pairs,
878                    mac,
879                    self.packed_queue,
880                    self.pci_address,
881                    self.mrg_rxbuf,
882                )
883                .context("failed to set up virtio networking")?,
884            ) as Box<dyn VirtioDevice>
885        })
886    }
887
888    fn create_jail(
889        &self,
890        jail_config: Option<&JailConfig>,
891        virtio_transport: VirtioDeviceType,
892    ) -> anyhow::Result<Option<Minijail>> {
893        let policy = if self.vhost_net.is_some() {
894            "vhost_net"
895        } else {
896            "net"
897        };
898
899        simple_jail(jail_config, &virtio_transport.seccomp_policy_file(policy))
900    }
901
902    fn create_vhost_user_device(
903        self,
904        keep_rds: &mut Vec<RawDescriptor>,
905    ) -> anyhow::Result<Box<dyn VhostUserDeviceBuilder>> {
906        let vq_pairs = self.vq_pairs.unwrap_or(1);
907        let multi_vq = vq_pairs > 1 && self.vhost_net.is_none();
908        let (tap, _mac) = create_tap_for_net_device(&self.mode, multi_vq)?;
909
910        let backend = NetBackend::new(tap, self.mrg_rxbuf)?;
911
912        keep_rds.extend(backend.as_raw_descriptors());
913
914        Ok(Box::new(backend))
915    }
916}
917
918/// Create a new tap interface based on NetParametersMode.
919#[cfg(feature = "net")]
920fn create_tap_for_net_device(
921    mode: &NetParametersMode,
922    multi_vq: bool,
923) -> DeviceResult<(Tap, Option<MacAddress>)> {
924    match mode {
925        NetParametersMode::TapName { tap_name, mac } => {
926            let tap = Tap::new_with_name(tap_name.as_bytes(), true, multi_vq)
927                .map_err(NetError::TapOpen)?;
928            Ok((tap, *mac))
929        }
930        NetParametersMode::TapFd { tap_fd, mac } => {
931            // SAFETY:
932            // Safe because we ensure that we get a unique handle to the fd.
933            let tap = unsafe {
934                Tap::from_raw_descriptor(
935                    validate_raw_descriptor(*tap_fd)
936                        .context("failed to validate tap descriptor")?,
937                )
938                .context("failed to create tap device")?
939            };
940            Ok((tap, *mac))
941        }
942        NetParametersMode::RawConfig {
943            host_ip,
944            netmask,
945            mac,
946        } => {
947            let tap = Tap::new(true, multi_vq).map_err(NetError::TapOpen)?;
948            tap.set_ip_addr(*host_ip).map_err(NetError::TapSetIp)?;
949            tap.set_netmask(*netmask).map_err(NetError::TapSetNetmask)?;
950            tap.set_mac_address(*mac)
951                .map_err(NetError::TapSetMacAddress)?;
952            tap.enable().map_err(NetError::TapEnable)?;
953            Ok((tap, None))
954        }
955    }
956}
957
958pub fn create_wayland_device(
959    protection_type: ProtectionType,
960    jail_config: Option<&JailConfig>,
961    wayland_socket_paths: &BTreeMap<String, PathBuf>,
962    resource_bridge: Option<Tube>,
963) -> DeviceResult {
964    let wayland_socket_dirs = wayland_socket_paths
965        .values()
966        .map(|path| path.parent())
967        .collect::<Option<Vec<_>>>()
968        .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
969
970    let features = virtio::base_features(protection_type);
971    let dev = virtio::Wl::new(features, wayland_socket_paths.clone(), resource_bridge)
972        .context("failed to create wayland device")?;
973
974    let jail = if let Some(jail_config) = jail_config {
975        let mut config = SandboxConfig::new(jail_config, "wl_device");
976        config.bind_mounts = true;
977        let mut jail = create_gpu_minijail(
978            &jail_config.pivot_root,
979            &config,
980            /* render_node_only= */ false,
981            /* snapshot_scratch_path= */ None,
982        )?;
983        // Bind mount the wayland socket's directory into jail's root. This is necessary since
984        // each new wayland context must open() the socket. If the wayland socket is ever
985        // destroyed and remade in the same host directory, new connections will be possible
986        // without restarting the wayland device.
987        for dir in &wayland_socket_dirs {
988            jail.mount(dir, dir, "", (libc::MS_BIND | libc::MS_REC) as usize)?;
989        }
990
991        Some(jail)
992    } else {
993        None
994    };
995
996    Ok(VirtioDeviceStub {
997        dev: Box::new(dev),
998        jail,
999    })
1000}
1001
1002#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1003fn create_video_device_jail(
1004    backend: VideoBackendType,
1005    jail_config: &JailConfig,
1006    typ: VideoDeviceType,
1007) -> Result<Minijail> {
1008    match typ {
1009        #[cfg(feature = "video-decoder")]
1010        VideoDeviceType::Decoder => {}
1011        #[cfg(feature = "video-encoder")]
1012        VideoDeviceType::Encoder => {}
1013        #[cfg(any(not(feature = "video-decoder"), not(feature = "video-encoder")))]
1014        // `typ` is always a VideoDeviceType enabled
1015        device_type => unreachable!("Not compiled with {:?} enabled", device_type),
1016    };
1017    let mut config = SandboxConfig::new(jail_config, "video_device");
1018    config.bind_mounts = true;
1019    let mut jail =
1020        create_sandbox_minijail(&jail_config.pivot_root, MAX_OPEN_FILES_DEFAULT, &config)?;
1021
1022    let need_drm_device = match backend {
1023        #[cfg(any(feature = "libvda", feature = "libvda-stub"))]
1024        VideoBackendType::Libvda => true,
1025        #[cfg(any(feature = "libvda", feature = "libvda-stub"))]
1026        VideoBackendType::LibvdaVd => true,
1027        #[cfg(feature = "vaapi")]
1028        VideoBackendType::Vaapi => true,
1029        #[cfg(feature = "ffmpeg")]
1030        VideoBackendType::Ffmpeg => false,
1031    };
1032
1033    if need_drm_device {
1034        jail_mount_bind_drm(&mut jail, /* render_node_only= */ true)?;
1035    }
1036
1037    #[cfg(target_arch = "x86_64")]
1038    {
1039        // Device nodes used by libdrm through minigbm in libvda on AMD devices.
1040        let sys_dev_char_path = Path::new("/sys/dev/char");
1041        jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
1042        let sys_devices_path = Path::new("/sys/devices");
1043        jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
1044
1045        // Required for loading dri or vulkan libraries loaded by minigbm on AMD devices.
1046        jail_mount_bind_if_exists(&mut jail, &["/usr/lib64", "/usr/lib", "/usr/share/vulkan"])?;
1047    }
1048
1049    // Device nodes required by libchrome which establishes Mojo connection in libvda.
1050    let dev_urandom_path = Path::new("/dev/urandom");
1051    jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
1052    let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1053    jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1054
1055    Ok(jail)
1056}
1057
1058#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1059pub fn create_video_device(
1060    backend: VideoBackendType,
1061    protection_type: ProtectionType,
1062    jail_config: Option<&JailConfig>,
1063    typ: VideoDeviceType,
1064    resource_bridge: Tube,
1065) -> DeviceResult {
1066    let jail = if let Some(jail_config) = jail_config {
1067        Some(create_video_device_jail(backend, jail_config, typ)?)
1068    } else {
1069        None
1070    };
1071
1072    Ok(VirtioDeviceStub {
1073        dev: Box::new(devices::virtio::VideoDevice::new(
1074            virtio::base_features(protection_type),
1075            typ,
1076            backend,
1077            Some(resource_bridge),
1078        )),
1079        jail,
1080    })
1081}
1082
1083#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1084pub fn register_video_device(
1085    backend: VideoBackendType,
1086    devs: &mut Vec<VirtioDeviceStub>,
1087    video_tube: Tube,
1088    protection_type: ProtectionType,
1089    jail_config: Option<&JailConfig>,
1090    typ: VideoDeviceType,
1091) -> Result<()> {
1092    devs.push(create_video_device(
1093        backend,
1094        protection_type,
1095        jail_config,
1096        typ,
1097        video_tube,
1098    )?);
1099    Ok(())
1100}
1101
1102#[cfg(feature = "media")]
1103pub fn create_simple_media_device(protection_type: ProtectionType) -> DeviceResult {
1104    use devices::virtio::media::create_virtio_media_simple_capture_device;
1105
1106    let features = virtio::base_features(protection_type);
1107    let dev = create_virtio_media_simple_capture_device(features);
1108
1109    Ok(VirtioDeviceStub { dev, jail: None })
1110}
1111
1112#[cfg(any(target_os = "android", target_os = "linux"))]
1113#[cfg(feature = "media")]
1114pub fn create_v4l2_device<P: AsRef<Path>>(
1115    protection_type: ProtectionType,
1116    path: P,
1117) -> DeviceResult {
1118    use devices::virtio::media::create_virtio_media_v4l2_proxy_device;
1119
1120    let features = virtio::base_features(protection_type);
1121    let dev = create_virtio_media_v4l2_proxy_device(features, path)?;
1122
1123    Ok(VirtioDeviceStub { dev, jail: None })
1124}
1125
1126#[cfg(all(feature = "media", feature = "video-decoder"))]
1127pub fn create_virtio_media_adapter(
1128    protection_type: ProtectionType,
1129    jail_config: Option<&JailConfig>,
1130    tube: Tube,
1131    backend: VideoBackendType,
1132) -> DeviceResult {
1133    use devices::virtio::media::create_virtio_media_decoder_adapter_device;
1134
1135    let jail = if let Some(jail_config) = jail_config {
1136        Some(create_video_device_jail(
1137            backend,
1138            jail_config,
1139            VideoDeviceType::Decoder,
1140        )?)
1141    } else {
1142        None
1143    };
1144
1145    let features = virtio::base_features(protection_type);
1146    let dev = create_virtio_media_decoder_adapter_device(features, tube, backend)?;
1147
1148    Ok(VirtioDeviceStub { dev, jail })
1149}
1150
1151impl VirtioDeviceBuilder for &VsockConfig {
1152    const NAME: &'static str = "vhost_vsock";
1153
1154    fn create_virtio_device(
1155        self,
1156        protection_type: ProtectionType,
1157    ) -> anyhow::Result<Box<dyn VirtioDevice>> {
1158        let features = virtio::base_features(protection_type);
1159
1160        let dev = virtio::vhost::Vsock::new(features, self)
1161            .context("failed to set up virtual socket device")?;
1162
1163        Ok(Box::new(dev))
1164    }
1165
1166    fn create_vhost_user_device(
1167        self,
1168        keep_rds: &mut Vec<RawDescriptor>,
1169    ) -> anyhow::Result<Box<dyn VhostUserDeviceBuilder>> {
1170        if self.max_queue_sizes.is_some() {
1171            bail!("vhost-user vsock doesn't support max-queue-sizes option");
1172        }
1173
1174        let vsock_device = VhostUserVsockDevice::new(self.cid, &self.vhost_device)?;
1175
1176        keep_rds.push(vsock_device.as_raw_descriptor());
1177
1178        Ok(Box::new(vsock_device))
1179    }
1180}
1181
1182#[cfg(target_arch = "aarch64")]
1183pub fn create_vhost_scmi_device(
1184    protected_vm: ProtectionType,
1185    jail_config: Option<&JailConfig>,
1186    vhost_scmi_dev_path: PathBuf,
1187) -> DeviceResult {
1188    let features = virtio::base_features(protected_vm);
1189
1190    let dev = virtio::vhost::Scmi::new(&vhost_scmi_dev_path, features)
1191        .context("failed to set up vhost scmi device")?;
1192
1193    Ok(VirtioDeviceStub {
1194        dev: Box::new(dev),
1195        jail: simple_jail(jail_config, "vhost_scmi_device")?,
1196    })
1197}
1198
1199pub fn create_fs_device(
1200    protection_type: ProtectionType,
1201    jail_config: Option<&JailConfig>,
1202    ugid: (Option<u32>, Option<u32>),
1203    uid_map: &str,
1204    gid_map: &str,
1205    src: &Path,
1206    tag: &str,
1207    fs_cfg: virtio::fs::Config,
1208    device_tube: Tube,
1209) -> DeviceResult {
1210    let max_open_files = base::linux::max_open_files()
1211        .context("failed to get max number of open files")?
1212        .rlim_max;
1213    let j = if let Some(jail_config) = jail_config {
1214        let mut config = SandboxConfig::new(jail_config, "fs_device");
1215        config.limit_caps = false;
1216        config.ugid_map = Some((uid_map, gid_map));
1217        // We want bind mounts from the parent namespaces to propagate into the fs device's
1218        // namespace.
1219        config.remount_mode = Some(libc::MS_SLAVE);
1220        config.run_as = if ugid == (None, None) {
1221            RunAsUser::Unspecified
1222        } else {
1223            RunAsUser::Specified(ugid.0.unwrap_or(0), ugid.1.unwrap_or(0))
1224        };
1225        create_sandbox_minijail(src, max_open_files, &config)?
1226    } else {
1227        create_base_minijail(src, max_open_files)?
1228    };
1229
1230    let features = virtio::base_features(protection_type);
1231    // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1232    // when num_queues > 1.
1233    let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube)
1234        .context("failed to create fs device")?;
1235
1236    Ok(VirtioDeviceStub {
1237        dev: Box::new(dev),
1238        jail: Some(j),
1239    })
1240}
1241
1242pub fn create_9p_device(
1243    protection_type: ProtectionType,
1244    jail_config: Option<&JailConfig>,
1245    ugid: (Option<u32>, Option<u32>),
1246    uid_map: &str,
1247    gid_map: &str,
1248    src: &Path,
1249    tag: &str,
1250    mut p9_cfg: p9::Config,
1251) -> DeviceResult {
1252    let max_open_files = base::linux::max_open_files()
1253        .context("failed to get max number of open files")?
1254        .rlim_max;
1255    let (jail, root) = if let Some(jail_config) = jail_config {
1256        let mut config = SandboxConfig::new(jail_config, "9p_device");
1257        config.limit_caps = false;
1258        config.ugid_map = Some((uid_map, gid_map));
1259        // We want bind mounts from the parent namespaces to propagate into the 9p server's
1260        // namespace.
1261        config.remount_mode = Some(libc::MS_SLAVE);
1262        config.run_as = if ugid == (None, None) {
1263            RunAsUser::Unspecified
1264        } else {
1265            RunAsUser::Specified(ugid.0.unwrap_or(0), ugid.1.unwrap_or(0))
1266        };
1267        let jail = create_sandbox_minijail(src, max_open_files, &config)?;
1268
1269        //  The shared directory becomes the root of the device's file system.
1270        let root = Path::new("/");
1271        (Some(jail), root)
1272    } else {
1273        // There's no mount namespace so we tell the server to treat the source directory as the
1274        // root.
1275        (None, src)
1276    };
1277
1278    let features = virtio::base_features(protection_type);
1279    p9_cfg.root = root.into();
1280    let dev = virtio::P9::new(features, tag, p9_cfg).context("failed to create 9p device")?;
1281
1282    Ok(VirtioDeviceStub {
1283        dev: Box::new(dev),
1284        jail,
1285    })
1286}
1287
1288pub fn create_pmem_device(
1289    protection_type: ProtectionType,
1290    jail_config: Option<&JailConfig>,
1291    vm: &mut impl Vm,
1292    resources: &mut SystemAllocator,
1293    pmem: &PmemOption,
1294    index: usize,
1295    pmem_device_tube: Tube,
1296) -> DeviceResult {
1297    let (fd, disk_size) = match pmem.vma_size {
1298        None => {
1299            let disk_image =
1300                open_file_or_duplicate(&pmem.path, OpenOptions::new().read(true).write(!pmem.ro))
1301                    .with_context(|| format!("failed to load disk image {}", pmem.path.display()))?;
1302            let metadata = std::fs::metadata(&pmem.path).with_context(|| {
1303                format!("failed to get disk image {} metadata", pmem.path.display())
1304            })?;
1305            (disk_image, metadata.len())
1306        }
1307        Some(size) => {
1308            let anon_file =
1309                create_anonymous_file(&pmem.path, size).context("failed to create anon file")?;
1310            (anon_file, size)
1311        }
1312    };
1313
1314    // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1315    // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1316    // we just align the size of the file to 2 MiB then access beyond the last page of the
1317    // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1318    // padding up to 2 MiB.
1319    let alignment = 2 * 1024 * 1024;
1320    let arena_size = disk_size
1321        .checked_next_multiple_of(alignment)
1322        .ok_or_else(|| anyhow!("pmem device image too big"))?;
1323
1324    let protection = {
1325        if pmem.ro {
1326            Protection::read()
1327        } else {
1328            Protection::read_write()
1329        }
1330    };
1331
1332    let arena = {
1333        // Conversion from u64 to usize may fail on 32bit system.
1334        let arena_size = usize::try_from(arena_size).context("pmem device image too big")?;
1335        let disk_size = usize::try_from(disk_size).context("pmem device image too big")?;
1336
1337        let mut arena =
1338            MemoryMappingArena::new(arena_size).context("failed to reserve pmem memory")?;
1339        arena
1340            .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
1341            .context("failed to reserve pmem memory")?;
1342
1343        // If the disk is not a multiple of the page size, the OS will fill the remaining part
1344        // of the page with zeroes. However, the anonymous mapping added below must start on a
1345        // page boundary, so round up the size before calculating the offset of the anon region.
1346        let disk_size = round_up_to_page_size(disk_size);
1347
1348        if arena_size > disk_size {
1349            // Add an anonymous region with the same protection as the disk mapping if the arena
1350            // size was aligned.
1351            arena
1352                .add_anon_protection(disk_size, arena_size - disk_size, protection)
1353                .context("failed to reserve pmem padding")?;
1354        }
1355        arena
1356    };
1357
1358    let mapping_address = GuestAddress(
1359        resources
1360            .allocate_mmio(
1361                arena_size,
1362                Alloc::PmemDevice(index),
1363                format!("pmem_disk_image_{index}"),
1364                AllocOptions::new()
1365                // Allocate from the bottom up rather than top down to avoid exceeding PHYSMEM_END
1366                // with kaslr.
1367                // TODO: b/375506171: Find a proper fix.
1368                .top_down(false)
1369                .prefetchable(true)
1370                // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1371                // cf. https://github.com/pmem/ndctl/issues/76
1372                .align(128 * 1024 * 1024), /* 128 MiB */
1373            )
1374            .context("failed to allocate memory for pmem device")?,
1375    );
1376
1377    let mem_slot = MemSlotConfig::MemSlot {
1378        idx: vm
1379            .add_memory_region(
1380                mapping_address,
1381                Box::new(arena),
1382                /* read_only = */ pmem.ro,
1383                /* log_dirty_pages = */ false,
1384                MemCacheType::CacheCoherent,
1385            )
1386            .context("failed to add pmem device memory")?,
1387    };
1388
1389    let dev = virtio::Pmem::new(
1390        virtio::base_features(protection_type),
1391        PmemConfig {
1392            disk_image: Some(fd),
1393            mapping_address,
1394            mem_slot,
1395            mapping_size: arena_size,
1396            pmem_device_tube,
1397            swap_interval: pmem.swap_interval,
1398            mapping_writable: !pmem.ro,
1399        },
1400    )
1401    .context("failed to create pmem device")?;
1402
1403    Ok(VirtioDeviceStub {
1404        dev: Box::new(dev) as Box<dyn VirtioDevice>,
1405        jail: simple_jail(jail_config, "pmem_device")?,
1406    })
1407}
1408
1409pub fn create_pmem_ext2_device(
1410    protection_type: ProtectionType,
1411    jail_config: Option<&JailConfig>,
1412    resources: &mut SystemAllocator,
1413    opts: &PmemExt2Option,
1414    index: usize,
1415    vm_memory_client: VmMemoryClient,
1416    pmem_device_tube: Tube,
1417    worker_process_pids: &mut BTreeSet<Pid>,
1418) -> DeviceResult {
1419    let mapping_size = opts.size as u64;
1420    let builder = ext2::Builder {
1421        inodes_per_group: opts.inodes_per_group,
1422        blocks_per_group: opts.blocks_per_group,
1423        size: mapping_size as u32,
1424        ..Default::default()
1425    };
1426
1427    let max_open_files = base::linux::max_open_files()
1428        .context("failed to get max number of open files")?
1429        .rlim_max;
1430    let mapping_address = GuestAddress(
1431        resources
1432            .allocate_mmio(
1433                mapping_size,
1434                Alloc::PmemDevice(index),
1435                format!("pmem_ext2_image_{index}"),
1436                AllocOptions::new()
1437                .top_down(true)
1438                .prefetchable(true)
1439                // 2MB alignment for DAX
1440                // cf. https://docs.pmem.io/persistent-memory/getting-started-guide/creating-development-environments/linux-environments/advanced-topics/i-o-alignment-considerations#verifying-io-alignment
1441                .align(2 * 1024 * 1024),
1442            )
1443            .context("failed to allocate memory for pmem device")?,
1444    );
1445
1446    let (mkfs_tube, mkfs_device_tube) = Tube::pair().context("failed to create tube")?;
1447
1448    let ext2_proc_pid = crate::crosvm::sys::linux::ext2::launch(
1449        mapping_address,
1450        vm_memory_client,
1451        mkfs_tube,
1452        &opts.path,
1453        &opts.ugid,
1454        (&opts.uid_map, &opts.gid_map),
1455        builder,
1456        jail_config,
1457    )
1458    .context("failed to spawn mkfs process")?;
1459
1460    worker_process_pids.insert(ext2_proc_pid);
1461
1462    let dev = virtio::Pmem::new(
1463        virtio::base_features(protection_type),
1464        PmemConfig {
1465            disk_image: None,
1466            mapping_address,
1467            mem_slot: MemSlotConfig::LazyInit {
1468                tube: mkfs_device_tube,
1469            },
1470            mapping_size,
1471            pmem_device_tube,
1472            swap_interval: None,
1473            mapping_writable: false,
1474        },
1475    )
1476    .context("failed to create pmem device")?;
1477
1478    let j = if let Some(jail_config) = jail_config {
1479        let mut config = SandboxConfig::new(jail_config, "pmem_device");
1480        config.limit_caps = false;
1481        create_sandbox_minijail(&opts.path, max_open_files, &config)?
1482    } else {
1483        create_base_minijail(&opts.path, max_open_files)?
1484    };
1485    Ok(VirtioDeviceStub {
1486        dev: Box::new(dev) as Box<dyn VirtioDevice>,
1487        jail: Some(j),
1488    })
1489}
1490
1491pub fn create_anonymous_file<P: AsRef<Path>>(path: P, size: u64) -> Result<File> {
1492    let file_name = path
1493        .as_ref()
1494        .to_str()
1495        .ok_or_else(|| Error::new(libc::EINVAL))?;
1496    let mut shm = SharedMemory::new(file_name, size)?;
1497    let mut seals = MemfdSeals::new();
1498
1499    seals.set_shrink_seal();
1500    seals.set_grow_seal();
1501    seals.set_seal_seal();
1502    shm.add_seals(seals)?;
1503
1504    Ok(shm.descriptor.into())
1505}
1506
1507pub fn create_iommu_device(
1508    protection_type: ProtectionType,
1509    jail_config: Option<&JailConfig>,
1510    iova_max_addr: u64,
1511    endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
1512    hp_endpoints_ranges: Vec<RangeInclusive<u32>>,
1513    translate_response_senders: Option<BTreeMap<u32, Tube>>,
1514    translate_request_rx: Option<Tube>,
1515    iommu_device_tube: Tube,
1516) -> DeviceResult {
1517    let dev = virtio::Iommu::new(
1518        virtio::base_features(protection_type),
1519        endpoints,
1520        iova_max_addr,
1521        hp_endpoints_ranges,
1522        translate_response_senders,
1523        translate_request_rx,
1524        Some(iommu_device_tube),
1525    )
1526    .context("failed to create IOMMU device")?;
1527
1528    Ok(VirtioDeviceStub {
1529        dev: Box::new(dev),
1530        jail: simple_jail(jail_config, "iommu_device")?,
1531    })
1532}
1533
1534fn add_bind_mounts(param: &SerialParameters, jail: &mut Minijail) -> Result<(), minijail::Error> {
1535    if let Some(path) = &param.path {
1536        if let SerialType::SystemSerialType = param.type_ {
1537            if let Some(parent) = path.as_path().parent() {
1538                if parent.exists() {
1539                    info!("Bind mounting dir {}", parent.display());
1540                    jail.mount_bind(parent, parent, true)?;
1541                }
1542            }
1543        }
1544    }
1545    Ok(())
1546}
1547
1548/// For creating console virtio devices.
1549impl VirtioDeviceBuilder for &SerialParameters {
1550    const NAME: &'static str = "serial";
1551
1552    fn create_virtio_device(
1553        self,
1554        protection_type: ProtectionType,
1555    ) -> anyhow::Result<Box<dyn VirtioDevice>> {
1556        let mut keep_rds = Vec::new();
1557        let evt = Event::new().context("failed to create event")?;
1558
1559        Ok(Box::new(
1560            self.create_serial_device::<Console>(protection_type, &evt, &mut keep_rds)
1561                .context("failed to create console device")?,
1562        ))
1563    }
1564
1565    fn create_vhost_user_device(
1566        self,
1567        keep_rds: &mut Vec<RawDescriptor>,
1568    ) -> anyhow::Result<Box<dyn VhostUserDeviceBuilder>> {
1569        Ok(Box::new(
1570            virtio::vhost_user_backend::create_vu_console_device(self, keep_rds)?,
1571        ))
1572    }
1573
1574    fn create_jail(
1575        &self,
1576        jail_config: Option<&JailConfig>,
1577        virtio_transport: VirtioDeviceType,
1578    ) -> anyhow::Result<Option<Minijail>> {
1579        if let Some(jail_config) = jail_config {
1580            let policy = virtio_transport.seccomp_policy_file("serial");
1581            let mut config = SandboxConfig::new(jail_config, &policy);
1582            config.bind_mounts = true;
1583            let mut jail =
1584                create_sandbox_minijail(&jail_config.pivot_root, MAX_OPEN_FILES_DEFAULT, &config)?;
1585            add_bind_mounts(self, &mut jail)
1586                .context("failed to add bind mounts for console device")?;
1587            Ok(Some(jail))
1588        } else {
1589            Ok(None)
1590        }
1591    }
1592}
1593
1594#[cfg(feature = "audio")]
1595pub fn create_sound_device(
1596    path: &Path,
1597    protection_type: ProtectionType,
1598    jail_config: Option<&JailConfig>,
1599) -> DeviceResult {
1600    let dev = virtio::new_sound(path, virtio::base_features(protection_type))
1601        .context("failed to create sound device")?;
1602
1603    Ok(VirtioDeviceStub {
1604        dev: Box::new(dev),
1605        jail: simple_jail(jail_config, "vios_audio_device")?,
1606    })
1607}
1608
1609#[allow(clippy::large_enum_variant)]
1610pub enum VfioDeviceVariant {
1611    Pci(VfioPciDevice),
1612    Platform(VfioPlatformDevice),
1613}
1614
1615pub fn create_vfio_device(
1616    jail_config: Option<&JailConfig>,
1617    vm: &impl Vm,
1618    resources: &mut SystemAllocator,
1619    add_control_tube: &mut impl FnMut(AnyControlTube),
1620    vfio_path: &Path,
1621    hotplug: bool,
1622    hotplug_bus: Option<u8>,
1623    guest_address: Option<PciAddress>,
1624    coiommu_endpoints: Option<&mut Vec<u16>>,
1625    iommu_dev: IommuDevType,
1626    dt_symbol: Option<String>,
1627    vfio_container_manager: &mut VfioContainerManager,
1628) -> DeviceResult<(VfioDeviceVariant, Option<Minijail>, Option<VfioWrapper>)> {
1629    let vfio_container = vfio_container_manager
1630        .get_container(iommu_dev, Some(vfio_path))
1631        .context("failed to get vfio container")?;
1632
1633    let (vfio_host_tube_mem, vfio_device_tube_mem) =
1634        Tube::pair().context("failed to create tube")?;
1635    add_control_tube(
1636        VmMemoryTube {
1637            tube: vfio_host_tube_mem,
1638            expose_with_viommu: false,
1639        }
1640        .into(),
1641    );
1642
1643    let (vfio_host_tube_vm, vfio_device_tube_vm) = Tube::pair().context("failed to create tube")?;
1644    add_control_tube(TaggedControlTube::Vm(vfio_host_tube_vm).into());
1645
1646    let vfio_device =
1647        VfioDevice::new_passthrough(&vfio_path, vm, vfio_container.clone(), iommu_dev, dt_symbol)
1648            .context("failed to create vfio device")?;
1649
1650    match vfio_device.device_type() {
1651        VfioDeviceType::Pci => {
1652            let (vfio_host_tube_msi, vfio_device_tube_msi) =
1653                Tube::pair().context("failed to create tube")?;
1654            add_control_tube(AnyControlTube::IrqTube(vfio_host_tube_msi));
1655
1656            let (vfio_host_tube_msix, vfio_device_tube_msix) =
1657                Tube::pair().context("failed to create tube")?;
1658            add_control_tube(AnyControlTube::IrqTube(vfio_host_tube_msix));
1659
1660            let mut vfio_pci_device = VfioPciDevice::new(
1661                vfio_path,
1662                vfio_device,
1663                hotplug,
1664                hotplug_bus,
1665                guest_address,
1666                vfio_device_tube_msi,
1667                vfio_device_tube_msix,
1668                VmMemoryClient::new(vfio_device_tube_mem),
1669                vfio_device_tube_vm,
1670            )?;
1671            // early reservation for pass-through PCI devices.
1672            let endpoint_addr = vfio_pci_device
1673                .allocate_address(resources)
1674                .context("failed to allocate resources early for vfio pci dev")?;
1675
1676            let viommu_mapper = match iommu_dev {
1677                IommuDevType::NoIommu | IommuDevType::PkvmPviommu => None,
1678                IommuDevType::VirtioIommu => {
1679                    Some(VfioWrapper::new(vfio_container, vm.get_memory().clone()))
1680                }
1681                IommuDevType::CoIommu => {
1682                    if let Some(endpoints) = coiommu_endpoints {
1683                        endpoints.push(endpoint_addr.to_u32() as u16);
1684                    } else {
1685                        bail!("Missed coiommu_endpoints vector to store the endpoint addr");
1686                    }
1687                    None
1688                }
1689            };
1690
1691            if hotplug {
1692                Ok((VfioDeviceVariant::Pci(vfio_pci_device), None, viommu_mapper))
1693            } else {
1694                Ok((
1695                    VfioDeviceVariant::Pci(vfio_pci_device),
1696                    simple_jail(jail_config, "vfio_device")?,
1697                    viommu_mapper,
1698                ))
1699            }
1700        }
1701        VfioDeviceType::Platform => {
1702            if guest_address.is_some() {
1703                bail!("guest-address is not supported for VFIO platform devices");
1704            }
1705
1706            if hotplug {
1707                bail!("hotplug is not supported for VFIO platform devices");
1708            }
1709
1710            let vfio_plat_dev =
1711                VfioPlatformDevice::new(vfio_device, VmMemoryClient::new(vfio_device_tube_mem));
1712
1713            Ok((
1714                VfioDeviceVariant::Platform(vfio_plat_dev),
1715                simple_jail(jail_config, "vfio_platform_device")?,
1716                None,
1717            ))
1718        }
1719    }
1720}
1721
1722/// Setup for devices with virtio-iommu
1723pub fn setup_virtio_access_platform(
1724    resources: &mut SystemAllocator,
1725    iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
1726    devices: &mut [(Box<dyn BusDeviceObj>, Option<Minijail>)],
1727) -> DeviceResult<(Option<BTreeMap<u32, Tube>>, Option<Tube>)> {
1728    let mut translate_response_senders: Option<
1729        BTreeMap<
1730            u32, // endpoint id
1731            Tube,
1732        >,
1733    > = None;
1734    let mut tube_pair: Option<(Tube, Tube)> = None;
1735
1736    for dev in devices.iter_mut() {
1737        if let Some(pci_dev) = dev.0.as_pci_device_mut() {
1738            if pci_dev.supports_iommu() {
1739                let endpoint_id = pci_dev
1740                    .allocate_address(resources)
1741                    .context("failed to allocate resources for pci dev")?
1742                    .to_u32();
1743                let mapper: Arc<Mutex<Box<dyn MemoryMapperTrait>>> =
1744                    Arc::new(Mutex::new(Box::new(BasicMemoryMapper::new(u64::MAX))));
1745                let (request_tx, _request_rx) =
1746                    tube_pair.get_or_insert_with(|| Tube::pair().unwrap());
1747                let CreateIpcMapperRet {
1748                    mapper: ipc_mapper,
1749                    response_tx,
1750                } = create_ipc_mapper(
1751                    endpoint_id,
1752                    #[allow(deprecated)]
1753                    request_tx.try_clone()?,
1754                );
1755                translate_response_senders
1756                    .get_or_insert_with(BTreeMap::new)
1757                    .insert(endpoint_id, response_tx);
1758                iommu_attached_endpoints.insert(endpoint_id, mapper);
1759                pci_dev.set_iommu(ipc_mapper)?;
1760            }
1761        }
1762    }
1763
1764    Ok((
1765        translate_response_senders,
1766        tube_pair.map(|(_request_tx, request_rx)| request_rx),
1767    ))
1768}