devices/virtio/vhost_user_backend/fs/sys/
linux.rs

1// Copyright 2022 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::os::unix::process::ExitStatusExt;
6use std::path::Path;
7use std::path::PathBuf;
8
9use anyhow::bail;
10use anyhow::Context;
11use base::error;
12use base::linux::max_open_files;
13use base::sys::wait_for_pid;
14use base::AsRawDescriptor;
15use base::AsRawDescriptors;
16use base::RawDescriptor;
17use cros_async::Executor;
18use jail::create_base_minijail;
19use jail::create_base_minijail_without_pivot_root;
20use minijail::Minijail;
21
22use crate::virtio::vhost_user_backend::fs::FsBackend;
23use crate::virtio::vhost_user_backend::fs::Options;
24use crate::virtio::vhost_user_backend::BackendConnection;
25
26fn default_uidmap() -> String {
27    // SAFETY: trivially safe
28    let euid = unsafe { libc::geteuid() };
29    format!("{euid} {euid} 1")
30}
31
32fn default_gidmap() -> String {
33    // SAFETY: trivially safe
34    let egid = unsafe { libc::getegid() };
35    format!("{egid} {egid} 1")
36}
37
38#[allow(clippy::unnecessary_cast)]
39fn jail_and_fork(
40    mut keep_rds: Vec<RawDescriptor>,
41    dir_path: PathBuf,
42    uid: u32,
43    gid: u32,
44    uid_map: Option<String>,
45    gid_map: Option<String>,
46    disable_sandbox: bool,
47    pivot_root: bool,
48) -> anyhow::Result<i32> {
49    let limit = max_open_files()
50        .context("failed to get max open files")?
51        .rlim_max;
52    // Create new minijail sandbox
53    let jail = if disable_sandbox {
54        if pivot_root {
55            create_base_minijail(dir_path.as_path(), limit)
56        } else {
57            create_base_minijail_without_pivot_root(dir_path.as_path(), limit)
58        }?
59    } else {
60        let mut j: Minijail = Minijail::new()?;
61
62        j.namespace_pids();
63        j.namespace_user();
64        j.namespace_user_disable_setgroups();
65        if uid != 0 {
66            j.change_uid(uid);
67        }
68        if gid != 0 {
69            j.change_gid(gid);
70        }
71        j.uidmap(&uid_map.unwrap_or_else(default_uidmap))?;
72        j.gidmap(&gid_map.unwrap_or_else(default_gidmap))?;
73        j.run_as_init();
74
75        j.namespace_vfs();
76        j.namespace_net();
77        j.no_new_privs();
78
79        // Only pivot_root if we are not re-using the current root directory.
80        if dir_path != Path::new("/") {
81            // It's safe to call `namespace_vfs` multiple times.
82            j.namespace_vfs();
83            j.enter_pivot_root(&dir_path)?;
84        }
85        j.set_remount_mode(libc::MS_SLAVE);
86
87        j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)?;
88        // vvu locks around 512k memory. Just give 1M.
89        j.set_rlimit(libc::RLIMIT_MEMLOCK as i32, 1 << 20, 1 << 20)?;
90        #[cfg(not(feature = "seccomp_trace"))]
91        jail::set_embedded_bpf_program(&mut j, "fs_device_vhost_user")?;
92        j.use_seccomp_filter();
93        j
94    };
95
96    // Make sure there are no duplicates in keep_rds
97    keep_rds.sort_unstable();
98    keep_rds.dedup();
99
100    // fork on the jail here
101    // SAFETY: trivially safe
102    let pid = unsafe { jail.fork(Some(&keep_rds))? };
103
104    if pid > 0 {
105        // Current FS driver jail does not use seccomp and jail_and_fork() does not have other
106        // users, so we do nothing here for seccomp_trace
107        // SAFETY: trivially safe
108        unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) };
109    }
110
111    if pid < 0 {
112        bail!("Fork error! {}", std::io::Error::last_os_error());
113    }
114
115    Ok(pid)
116}
117
118/// Starts a vhost-user fs device.
119/// Returns an error if the given `args` is invalid or the device fails to run.
120#[allow(unused_mut)]
121pub fn start_device(mut opts: Options) -> anyhow::Result<()> {
122    #[allow(unused_mut)]
123    let mut is_pivot_root_required = true;
124    #[cfg(feature = "fs_runtime_ugid_map")]
125    if let Some(ref mut cfg) = opts.cfg {
126        if !cfg.ugid_map.is_empty() && (!opts.disable_sandbox || !opts.skip_pivot_root) {
127            bail!("uid_gid_map can only be set with disable sandbox and skip_pivot_root option");
128        }
129
130        if cfg.unmap_guest_memory_on_fork && !opts.disable_sandbox {
131            bail!("unmap_guest_memory_on_fork requires --disable-sandbox");
132        }
133
134        if opts.skip_pivot_root {
135            is_pivot_root_required = false;
136        }
137    }
138    let ex = Executor::new().context("Failed to create executor")?;
139    let fs_device = FsBackend::new(
140        &opts.tag,
141        opts.shared_dir
142            .to_str()
143            .expect("Failed to convert opts.shared_dir to str()"),
144        opts.skip_pivot_root,
145        opts.cfg,
146    )?;
147
148    let mut keep_rds = fs_device.keep_rds.clone();
149    keep_rds.append(&mut ex.as_raw_descriptors());
150
151    let conn =
152        BackendConnection::from_opts(opts.socket.as_deref(), opts.socket_path.as_deref(), opts.fd)?;
153    keep_rds.push(conn.as_raw_descriptor());
154
155    base::syslog::push_descriptors(&mut keep_rds);
156    cros_tracing::push_descriptors!(&mut keep_rds);
157    metrics::push_descriptors(&mut keep_rds);
158    let pid = jail_and_fork(
159        keep_rds,
160        opts.shared_dir,
161        opts.uid,
162        opts.gid,
163        opts.uid_map,
164        opts.gid_map,
165        opts.disable_sandbox,
166        is_pivot_root_required,
167    )?;
168
169    match pid {
170        0 => {
171            // Child process runs the device and exits, not returns.
172            if let Err(e) = ex.run_until(conn.run_backend(fs_device, &ex)) {
173                error!("Error in vhost-user-fs device: {:#}", e);
174                std::process::exit(1);
175            }
176            std::process::exit(0);
177        }
178        pid if pid < 0 => {
179            unreachable!("fork error must have been handled in jail_and_fork()");
180        }
181        _ => {
182            let (_child_pid, status) =
183                wait_for_pid(pid, 0).context("failed to wait for child process")?;
184            if let Some(signal) = status.signal() {
185                panic!("Child process {pid} was killed by signal {signal}");
186            }
187            if let Some(exit_code) = status.code() {
188                if exit_code != 0 {
189                    bail!("Child process {pid} exited with code {exit_code}");
190                }
191            }
192        }
193    };
194    Ok(())
195}