1#![deny(missing_docs)]
6#![allow(dead_code)]
7
8use std::path::Path;
9use std::str;
10use std::sync::LazyLock;
11
12use anyhow::bail;
13use anyhow::Context;
14use anyhow::Result;
15#[cfg(feature = "seccomp_trace")]
16use base::debug;
17use base::getegid;
18use base::geteuid;
19#[cfg(feature = "seccomp_trace")]
20use base::warn;
21use libc::c_ulong;
22use minijail::Minijail;
23#[cfg(feature = "seccomp_trace")]
24use static_assertions::const_assert;
25#[cfg(feature = "seccomp_trace")]
26use zerocopy::Immutable;
27#[cfg(feature = "seccomp_trace")]
28use zerocopy::IntoBytes;
29
30use crate::config::JailConfig;
31
32static EMBEDDED_BPFS: LazyLock<std::collections::HashMap<&str, Vec<u8>>> =
33 LazyLock::new(|| include!(concat!(env!("OUT_DIR"), "/bpf_includes.in")));
34
35pub const MAX_OPEN_FILES_DEFAULT: u64 = 4096;
41const MAX_OPEN_FILES_FOR_GPU: u64 = 32768;
43pub const MAX_OPEN_FILES_FOR_JAIL_WARDEN: u64 = 65536;
45
46pub enum RunAsUser {
48 Unspecified,
50 CurrentUser,
52 Root,
54 Specified(u32, u32),
57}
58
59pub struct SandboxConfig<'a> {
61 pub limit_caps: bool,
63 log_failures: bool,
64 seccomp_policy_dir: Option<&'a Path>,
65 seccomp_policy_name: &'a str,
66 pub ugid_map: Option<(&'a str, &'a str)>,
68 pub remount_mode: Option<c_ulong>,
70 pub namespace_net: bool,
72 pub bind_mounts: bool,
78 pub run_as: RunAsUser,
80}
81
82impl<'a> SandboxConfig<'a> {
83 pub fn new(jail_config: &'a JailConfig, policy: &'a str) -> Self {
85 Self {
86 limit_caps: true,
87 log_failures: jail_config.seccomp_log_failures,
88 seccomp_policy_dir: jail_config.seccomp_policy_dir.as_ref().map(Path::new),
89 seccomp_policy_name: policy,
90 ugid_map: None,
91 remount_mode: None,
92 namespace_net: true,
93 bind_mounts: false,
94 run_as: RunAsUser::Unspecified,
95 }
96 }
97}
98
99pub struct ScopedMinijail(pub Minijail);
101
102impl Drop for ScopedMinijail {
103 fn drop(&mut self) {
104 let _ = self.0.kill();
105 }
106}
107
108#[allow(clippy::unnecessary_cast)]
118pub fn create_base_minijail(root: &Path, max_open_files: u64) -> Result<Minijail> {
119 if !root.is_dir() {
121 bail!("{:?} is not a directory, cannot create jail", root);
122 }
123 if !root.is_absolute() {
125 bail!("{:?} is not absolute path", root);
126 }
127
128 let mut jail = Minijail::new().context("failed to jail device")?;
129
130 if root != Path::new("/") {
132 jail.namespace_vfs();
134 jail.enter_pivot_root(root)
135 .context("failed to pivot root device")?;
136 }
137
138 jail.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files)
139 .context("error setting max open files")?;
140
141 Ok(jail)
142}
143
144#[allow(clippy::unnecessary_cast)]
161pub fn create_base_minijail_without_pivot_root(
162 root: &Path,
163 max_open_files: u64,
164) -> Result<Minijail> {
165 if !root.is_dir() {
167 bail!("{:?} is not a directory, cannot create jail", root);
168 }
169 if !root.is_absolute() {
170 bail!("{:?} is not absolute path", root);
171 }
172
173 let mut jail = Minijail::new().context("failed to jail device")?;
174 jail.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files)
175 .context("error setting max open files")?;
176
177 Ok(jail)
178}
179
180pub fn create_sandbox_minijail(
188 root: &Path,
189 max_open_files: u64,
190 config: &SandboxConfig,
191) -> Result<Minijail> {
192 let mut jail = create_base_minijail(root, max_open_files)?;
193
194 jail.namespace_pids();
195 jail.namespace_user();
196 jail.namespace_user_disable_setgroups();
197 if config.limit_caps {
198 jail.use_caps(0);
200 }
201 match config.run_as {
202 RunAsUser::Unspecified => {
203 if config.bind_mounts && config.ugid_map.is_none() {
204 add_current_user_to_jail(&mut jail)?;
206 }
207 }
208 RunAsUser::CurrentUser => {
209 add_current_user_to_jail(&mut jail)?;
210 }
211 RunAsUser::Root => {
212 let crosvm_uid = geteuid();
214 let crosvm_gid = getegid();
215 jail.uidmap(&format!("0 {crosvm_uid} 1"))
216 .context("error setting UID map")?;
217 jail.gidmap(&format!("0 {crosvm_gid} 1"))
218 .context("error setting GID map")?;
219 }
220 RunAsUser::Specified(uid, gid) => {
221 if uid != 0 {
222 jail.change_uid(uid)
223 }
224 if gid != 0 {
225 jail.change_gid(gid)
226 }
227 }
228 }
229 if config.bind_mounts {
230 jail.mount_with_data(
234 Path::new("none"),
235 Path::new("/"),
236 "tmpfs",
237 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
238 "size=67108864",
239 )?;
240
241 #[cfg(feature = "appimage")]
242 if let Ok(appdir) = std::env::var("APPDIR") {
243 let appdir_path = Path::new(&appdir);
244 let canonical_path = appdir_path.canonicalize().with_context(|| {
245 format!("failed to canonicalize APPDIR path: {:?}", appdir_path)
246 })?;
247 if !canonical_path.starts_with("/tmp") {
248 bail!("APPDIR path {:?} is not under /tmp", canonical_path);
249 }
250 jail.mount_bind(&canonical_path, &canonical_path, false)
251 .context("failed to bind mount APPDIR into jail")?;
252 }
253 }
254 if let Some((uid_map, gid_map)) = config.ugid_map {
255 jail.uidmap(uid_map).context("error setting UID map")?;
256 jail.gidmap(gid_map).context("error setting GID map")?;
257 }
258 jail.namespace_vfs();
260
261 if config.namespace_net {
262 jail.namespace_net();
264 }
265
266 jail.no_new_privs();
268
269 #[cfg(feature = "seccomp_trace")]
270 {
271 #[repr(C)]
272 #[derive(Immutable, IntoBytes)]
273 struct sock_filter {
274 code: u16, jt: u8, jf: u8, k: u32, }
280
281 const SECCOMP_RET_TRACE: u32 = 0x7ff00000;
284 const SECCOMP_RET_LOG: u32 = 0x7ffc0000;
285 const BPF_RET: u16 = 0x06;
286 const BPF_K: u16 = 0x00;
287
288 const FILTER_RET_LOG_BLOCK: sock_filter = sock_filter {
290 code: BPF_RET | BPF_K,
291 jt: 0,
292 jf: 0,
293 k: SECCOMP_RET_LOG,
294 };
295
296 warn!("The running crosvm is compiled with seccomp_trace feature, and is striclty used for debugging purpose only. DO NOT USE IN PRODUCTION!!!");
297 debug!(
298 "seccomp_trace {{\"event\": \"minijail_create\", \"name\": \"{}\", \"jail_addr\": \"0x{:x}\"}}",
299 config.seccomp_policy_name,
300 read_jail_addr(&jail),
301 );
302 jail.parse_seccomp_bytes(FILTER_RET_LOG_BLOCK.as_bytes())
303 .unwrap();
304 }
305
306 #[cfg(not(feature = "seccomp_trace"))]
307 if let Some(seccomp_policy_dir) = config.seccomp_policy_dir {
308 let seccomp_policy_path = seccomp_policy_dir.join(config.seccomp_policy_name);
309 let bpf_policy_file = seccomp_policy_path.with_extension("bpf");
317 if bpf_policy_file.exists() && !config.log_failures {
318 jail.parse_seccomp_program(&bpf_policy_file)
319 .with_context(|| {
320 format!(
321 "failed to parse precompiled seccomp policy: {}",
322 bpf_policy_file.display()
323 )
324 })?;
325 } else {
326 jail.set_seccomp_filter_tsync();
329 if config.log_failures {
330 jail.log_seccomp_filter_failures();
331 }
332 let bpf_policy_file = seccomp_policy_path.with_extension("policy");
333 jail.parse_seccomp_filters(&bpf_policy_file)
334 .with_context(|| {
335 format!(
336 "failed to parse seccomp policy: {}",
337 bpf_policy_file.display()
338 )
339 })?;
340 }
341 } else {
342 set_embedded_bpf_program(&mut jail, config.seccomp_policy_name)?;
343 }
344
345 jail.use_seccomp_filter();
346 jail.run_as_init();
348 if let Some(mode) = config.remount_mode {
350 jail.set_remount_mode(mode);
351 }
352
353 Ok(jail)
354}
355
356pub fn simple_jail(jail_config: Option<&JailConfig>, policy: &str) -> Result<Option<Minijail>> {
360 if let Some(jail_config) = jail_config {
361 let config = SandboxConfig::new(jail_config, policy);
362 Ok(Some(create_sandbox_minijail(
363 &jail_config.pivot_root,
364 MAX_OPEN_FILES_DEFAULT,
365 &config,
366 )?))
367 } else {
368 Ok(None)
369 }
370}
371
372pub fn create_gpu_minijail(
374 root: &Path,
375 config: &SandboxConfig,
376 render_node_only: bool,
377 snapshot_scratch_directory: Option<&Path>,
378) -> Result<Minijail> {
379 let mut jail = create_sandbox_minijail(root, MAX_OPEN_FILES_FOR_GPU, config)?;
380
381 let sys_dev_char_path = Path::new("/sys/dev/char");
383 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
384
385 let sys_cpuset_path = Path::new("/sys/fs/cgroup/cpuset");
389 if sys_cpuset_path.exists() {
390 jail.mount_bind(sys_cpuset_path, sys_cpuset_path, true)?;
391 }
392
393 let sys_devices_path = Path::new("/sys/devices");
394 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
395
396 jail_mount_bind_drm(&mut jail, render_node_only)?;
397
398 let mali0_path = Path::new("/dev/mali0");
400 if mali0_path.exists() {
401 jail.mount_bind(mali0_path, mali0_path, true)?;
402 }
403
404 let pvr_sync_path = Path::new("/dev/pvr_sync");
405 if pvr_sync_path.exists() {
406 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
407 }
408
409 let udmabuf_path = Path::new("/dev/udmabuf");
411 if udmabuf_path.exists() {
412 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
413 }
414
415 jail_mount_bind_if_exists(
417 &mut jail,
418 &[
419 "/usr/lib",
420 "/usr/lib64",
421 "/lib",
422 "/lib64",
423 "/usr/share/drirc.d",
424 "/usr/share/glvnd",
425 "/usr/share/libdrm",
426 "/usr/share/vulkan",
427 ],
428 )?;
429
430 mount_proc(&mut jail)?;
432
433 let perfetto_path = Path::new("/run/perfetto");
436 if perfetto_path.exists() {
437 jail.mount_bind(perfetto_path, perfetto_path, true)?;
438 }
439
440 if let Some(snapshot_scratch_directory) = snapshot_scratch_directory {
442 jail.mount_with_data(
443 Path::new("none"),
444 snapshot_scratch_directory,
445 "tmpfs",
446 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
447 "size=4294967296",
448 )?;
449 }
450
451 Ok(jail)
452}
453
454pub fn jail_mount_bind_drm(jail: &mut Minijail, render_node_only: bool) -> Result<()> {
458 if render_node_only {
459 const DRM_NUM_NODES: u32 = 63;
460 const DRM_RENDER_NODE_START: u32 = 128;
461 for offset in 0..DRM_NUM_NODES {
462 let path_str = format!("/dev/dri/renderD{}", DRM_RENDER_NODE_START + offset);
463 let drm_dri_path = Path::new(&path_str);
464 if !drm_dri_path.exists() {
465 break;
466 }
467 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
468 }
469 } else {
470 let drm_dri_path = Path::new("/dev/dri");
471 if drm_dri_path.exists() {
472 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
473 }
474 }
475
476 Ok(())
477}
478
479pub fn jail_mount_bind_if_exists<P: AsRef<std::ffi::OsStr>>(
483 jail: &mut Minijail,
484 dirs: &[P],
485) -> Result<()> {
486 for dir in dirs {
487 let dir_path = Path::new(dir);
488 if dir_path.exists() {
489 jail.mount_bind(dir_path, dir_path, false)?;
490 }
491 }
492
493 Ok(())
494}
495
496pub fn mount_proc(jail: &mut Minijail) -> Result<()> {
498 jail.mount(
499 Path::new("proc"),
500 Path::new("/proc"),
501 "proc",
502 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
503 )?;
504 Ok(())
505}
506
507#[cfg(feature = "seccomp_trace")]
509pub fn read_jail_addr(jail: &Minijail) -> usize {
510 const_assert!(std::mem::size_of::<Minijail>() >= std::mem::size_of::<usize>());
513 unsafe { *(jail as *const Minijail as *const usize) }
515}
516
517fn add_current_user_to_jail(jail: &mut Minijail) -> Result<()> {
520 let crosvm_uid = geteuid();
521 let crosvm_gid = getegid();
522
523 jail.uidmap(&format!("{crosvm_uid} {crosvm_uid} 1"))
524 .context("error setting UID map")?;
525 jail.gidmap(&format!("{crosvm_gid} {crosvm_gid} 1"))
526 .context("error setting GID map")?;
527
528 if crosvm_uid != 0 {
529 jail.change_uid(crosvm_uid);
530 }
531 if crosvm_gid != 0 {
532 jail.change_gid(crosvm_gid);
533 }
534 Ok(())
535}
536
537pub fn set_embedded_bpf_program(jail: &mut Minijail, seccomp_policy_name: &str) -> Result<()> {
539 let bpf_program = EMBEDDED_BPFS.get(seccomp_policy_name).with_context(|| {
540 format!("failed to find embedded seccomp policy: {seccomp_policy_name}")
541 })?;
542 jail.parse_seccomp_bytes(bpf_program).with_context(|| {
543 format!("failed to parse embedded seccomp policy: {seccomp_policy_name}")
544 })?;
545 Ok(())
546}