1#![deny(missing_docs)]
6#![allow(dead_code)]
7
8use std::path::Path;
9use std::str;
10use std::sync::LazyLock;
11
12use anyhow::bail;
13use anyhow::Context;
14use anyhow::Result;
15#[cfg(feature = "seccomp_trace")]
16use base::debug;
17use base::getegid;
18use base::geteuid;
19#[cfg(feature = "seccomp_trace")]
20use base::warn;
21use libc::c_ulong;
22use minijail::Minijail;
23#[cfg(feature = "seccomp_trace")]
24use static_assertions::const_assert;
25#[cfg(feature = "seccomp_trace")]
26use zerocopy::Immutable;
27#[cfg(feature = "seccomp_trace")]
28use zerocopy::IntoBytes;
29
30use crate::config::JailConfig;
31
32static EMBEDDED_BPFS: LazyLock<std::collections::HashMap<&str, Vec<u8>>> =
33 LazyLock::new(|| include!(concat!(env!("OUT_DIR"), "/bpf_includes.in")));
34
35pub const MAX_OPEN_FILES_DEFAULT: u64 = 4096;
41const MAX_OPEN_FILES_FOR_GPU: u64 = 32768;
43pub const MAX_OPEN_FILES_FOR_JAIL_WARDEN: u64 = 65536;
45
46pub enum RunAsUser {
48 Unspecified,
50 CurrentUser,
52 Root,
54 Specified(u32, u32),
57}
58
59pub struct SandboxConfig<'a> {
61 pub limit_caps: bool,
63 log_failures: bool,
64 seccomp_policy_dir: Option<&'a Path>,
65 seccomp_policy_name: &'a str,
66 pub ugid_map: Option<(&'a str, &'a str)>,
68 pub remount_mode: Option<c_ulong>,
70 pub namespace_net: bool,
72 pub bind_mounts: bool,
78 pub run_as: RunAsUser,
80}
81
82impl<'a> SandboxConfig<'a> {
83 pub fn new(jail_config: &'a JailConfig, policy: &'a str) -> Self {
85 Self {
86 limit_caps: true,
87 log_failures: jail_config.seccomp_log_failures,
88 seccomp_policy_dir: jail_config.seccomp_policy_dir.as_ref().map(Path::new),
89 seccomp_policy_name: policy,
90 ugid_map: None,
91 remount_mode: None,
92 namespace_net: true,
93 bind_mounts: false,
94 run_as: RunAsUser::Unspecified,
95 }
96 }
97}
98
99pub struct ScopedMinijail(pub Minijail);
101
102impl Drop for ScopedMinijail {
103 fn drop(&mut self) {
104 let _ = self.0.kill();
105 }
106}
107
108#[allow(clippy::unnecessary_cast)]
118pub fn create_base_minijail(root: &Path, max_open_files: u64) -> Result<Minijail> {
119 if !root.is_dir() {
121 bail!("{:?} is not a directory, cannot create jail", root);
122 }
123 if !root.is_absolute() {
125 bail!("{:?} is not absolute path", root);
126 }
127
128 let mut jail = Minijail::new().context("failed to jail device")?;
129
130 if root != Path::new("/") {
132 jail.namespace_vfs();
134 jail.enter_pivot_root(root)
135 .context("failed to pivot root device")?;
136 }
137
138 jail.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files)
139 .context("error setting max open files")?;
140
141 Ok(jail)
142}
143
144#[allow(clippy::unnecessary_cast)]
161pub fn create_base_minijail_without_pivot_root(
162 root: &Path,
163 max_open_files: u64,
164) -> Result<Minijail> {
165 if !root.is_dir() {
167 bail!("{:?} is not a directory, cannot create jail", root);
168 }
169 if !root.is_absolute() {
170 bail!("{:?} is not absolute path", root);
171 }
172
173 let mut jail = Minijail::new().context("failed to jail device")?;
174 jail.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files)
175 .context("error setting max open files")?;
176
177 Ok(jail)
178}
179
180pub fn create_sandbox_minijail(
188 root: &Path,
189 max_open_files: u64,
190 config: &SandboxConfig,
191) -> Result<Minijail> {
192 let mut jail = create_base_minijail(root, max_open_files)?;
193
194 jail.namespace_pids();
195 jail.namespace_user();
196 jail.namespace_user_disable_setgroups();
197 if config.limit_caps {
198 jail.use_caps(0);
200 }
201 match config.run_as {
202 RunAsUser::Unspecified => {
203 if config.bind_mounts && config.ugid_map.is_none() {
204 add_current_user_to_jail(&mut jail)?;
206 }
207 }
208 RunAsUser::CurrentUser => {
209 add_current_user_to_jail(&mut jail)?;
210 }
211 RunAsUser::Root => {
212 let crosvm_uid = geteuid();
214 let crosvm_gid = getegid();
215 jail.uidmap(&format!("0 {crosvm_uid} 1"))
216 .context("error setting UID map")?;
217 jail.gidmap(&format!("0 {crosvm_gid} 1"))
218 .context("error setting GID map")?;
219 }
220 RunAsUser::Specified(uid, gid) => {
221 if uid != 0 {
222 jail.change_uid(uid)
223 }
224 if gid != 0 {
225 jail.change_gid(gid)
226 }
227 }
228 }
229 if config.bind_mounts {
230 jail.mount_with_data(
234 Path::new("none"),
235 Path::new("/"),
236 "tmpfs",
237 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
238 "size=67108864",
239 )?;
240 }
241 if let Some((uid_map, gid_map)) = config.ugid_map {
242 jail.uidmap(uid_map).context("error setting UID map")?;
243 jail.gidmap(gid_map).context("error setting GID map")?;
244 }
245 jail.namespace_vfs();
247
248 if config.namespace_net {
249 jail.namespace_net();
251 }
252
253 jail.no_new_privs();
255
256 #[cfg(feature = "seccomp_trace")]
257 {
258 #[repr(C)]
259 #[derive(Immutable, IntoBytes)]
260 struct sock_filter {
261 code: u16, jt: u8, jf: u8, k: u32, }
267
268 const SECCOMP_RET_TRACE: u32 = 0x7ff00000;
271 const SECCOMP_RET_LOG: u32 = 0x7ffc0000;
272 const BPF_RET: u16 = 0x06;
273 const BPF_K: u16 = 0x00;
274
275 const FILTER_RET_LOG_BLOCK: sock_filter = sock_filter {
277 code: BPF_RET | BPF_K,
278 jt: 0,
279 jf: 0,
280 k: SECCOMP_RET_LOG,
281 };
282
283 warn!("The running crosvm is compiled with seccomp_trace feature, and is striclty used for debugging purpose only. DO NOT USE IN PRODUCTION!!!");
284 debug!(
285 "seccomp_trace {{\"event\": \"minijail_create\", \"name\": \"{}\", \"jail_addr\": \"0x{:x}\"}}",
286 config.seccomp_policy_name,
287 read_jail_addr(&jail),
288 );
289 jail.parse_seccomp_bytes(FILTER_RET_LOG_BLOCK.as_bytes())
290 .unwrap();
291 }
292
293 #[cfg(not(feature = "seccomp_trace"))]
294 if let Some(seccomp_policy_dir) = config.seccomp_policy_dir {
295 let seccomp_policy_path = seccomp_policy_dir.join(config.seccomp_policy_name);
296 let bpf_policy_file = seccomp_policy_path.with_extension("bpf");
304 if bpf_policy_file.exists() && !config.log_failures {
305 jail.parse_seccomp_program(&bpf_policy_file)
306 .with_context(|| {
307 format!(
308 "failed to parse precompiled seccomp policy: {}",
309 bpf_policy_file.display()
310 )
311 })?;
312 } else {
313 jail.set_seccomp_filter_tsync();
316 if config.log_failures {
317 jail.log_seccomp_filter_failures();
318 }
319 let bpf_policy_file = seccomp_policy_path.with_extension("policy");
320 jail.parse_seccomp_filters(&bpf_policy_file)
321 .with_context(|| {
322 format!(
323 "failed to parse seccomp policy: {}",
324 bpf_policy_file.display()
325 )
326 })?;
327 }
328 } else {
329 set_embedded_bpf_program(&mut jail, config.seccomp_policy_name)?;
330 }
331
332 jail.use_seccomp_filter();
333 jail.run_as_init();
335 if let Some(mode) = config.remount_mode {
337 jail.set_remount_mode(mode);
338 }
339
340 Ok(jail)
341}
342
343pub fn simple_jail(jail_config: Option<&JailConfig>, policy: &str) -> Result<Option<Minijail>> {
347 if let Some(jail_config) = jail_config {
348 let config = SandboxConfig::new(jail_config, policy);
349 Ok(Some(create_sandbox_minijail(
350 &jail_config.pivot_root,
351 MAX_OPEN_FILES_DEFAULT,
352 &config,
353 )?))
354 } else {
355 Ok(None)
356 }
357}
358
359pub fn create_gpu_minijail(
361 root: &Path,
362 config: &SandboxConfig,
363 render_node_only: bool,
364 snapshot_scratch_directory: Option<&Path>,
365) -> Result<Minijail> {
366 let mut jail = create_sandbox_minijail(root, MAX_OPEN_FILES_FOR_GPU, config)?;
367
368 let sys_dev_char_path = Path::new("/sys/dev/char");
370 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
371
372 let sys_cpuset_path = Path::new("/sys/fs/cgroup/cpuset");
376 if sys_cpuset_path.exists() {
377 jail.mount_bind(sys_cpuset_path, sys_cpuset_path, true)?;
378 }
379
380 let sys_devices_path = Path::new("/sys/devices");
381 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
382
383 jail_mount_bind_drm(&mut jail, render_node_only)?;
384
385 let mali0_path = Path::new("/dev/mali0");
387 if mali0_path.exists() {
388 jail.mount_bind(mali0_path, mali0_path, true)?;
389 }
390
391 let pvr_sync_path = Path::new("/dev/pvr_sync");
392 if pvr_sync_path.exists() {
393 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
394 }
395
396 let udmabuf_path = Path::new("/dev/udmabuf");
398 if udmabuf_path.exists() {
399 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
400 }
401
402 jail_mount_bind_if_exists(
404 &mut jail,
405 &[
406 "/usr/lib",
407 "/usr/lib64",
408 "/lib",
409 "/lib64",
410 "/usr/share/drirc.d",
411 "/usr/share/glvnd",
412 "/usr/share/libdrm",
413 "/usr/share/vulkan",
414 ],
415 )?;
416
417 mount_proc(&mut jail)?;
419
420 let perfetto_path = Path::new("/run/perfetto");
423 if perfetto_path.exists() {
424 jail.mount_bind(perfetto_path, perfetto_path, true)?;
425 }
426
427 if let Some(snapshot_scratch_directory) = snapshot_scratch_directory {
429 jail.mount_with_data(
430 Path::new("none"),
431 snapshot_scratch_directory,
432 "tmpfs",
433 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
434 "size=4294967296",
435 )?;
436 }
437
438 Ok(jail)
439}
440
441pub fn jail_mount_bind_drm(jail: &mut Minijail, render_node_only: bool) -> Result<()> {
445 if render_node_only {
446 const DRM_NUM_NODES: u32 = 63;
447 const DRM_RENDER_NODE_START: u32 = 128;
448 for offset in 0..DRM_NUM_NODES {
449 let path_str = format!("/dev/dri/renderD{}", DRM_RENDER_NODE_START + offset);
450 let drm_dri_path = Path::new(&path_str);
451 if !drm_dri_path.exists() {
452 break;
453 }
454 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
455 }
456 } else {
457 let drm_dri_path = Path::new("/dev/dri");
458 if drm_dri_path.exists() {
459 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
460 }
461 }
462
463 Ok(())
464}
465
466pub fn jail_mount_bind_if_exists<P: AsRef<std::ffi::OsStr>>(
470 jail: &mut Minijail,
471 dirs: &[P],
472) -> Result<()> {
473 for dir in dirs {
474 let dir_path = Path::new(dir);
475 if dir_path.exists() {
476 jail.mount_bind(dir_path, dir_path, false)?;
477 }
478 }
479
480 Ok(())
481}
482
483pub fn mount_proc(jail: &mut Minijail) -> Result<()> {
485 jail.mount(
486 Path::new("proc"),
487 Path::new("/proc"),
488 "proc",
489 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
490 )?;
491 Ok(())
492}
493
494#[cfg(feature = "seccomp_trace")]
496pub fn read_jail_addr(jail: &Minijail) -> usize {
497 const_assert!(std::mem::size_of::<Minijail>() >= std::mem::size_of::<usize>());
500 unsafe { *(jail as *const Minijail as *const usize) }
502}
503
504fn add_current_user_to_jail(jail: &mut Minijail) -> Result<()> {
507 let crosvm_uid = geteuid();
508 let crosvm_gid = getegid();
509
510 jail.uidmap(&format!("{crosvm_uid} {crosvm_uid} 1"))
511 .context("error setting UID map")?;
512 jail.gidmap(&format!("{crosvm_gid} {crosvm_gid} 1"))
513 .context("error setting GID map")?;
514
515 if crosvm_uid != 0 {
516 jail.change_uid(crosvm_uid);
517 }
518 if crosvm_gid != 0 {
519 jail.change_gid(crosvm_gid);
520 }
521 Ok(())
522}
523
524pub fn set_embedded_bpf_program(jail: &mut Minijail, seccomp_policy_name: &str) -> Result<()> {
526 let bpf_program = EMBEDDED_BPFS.get(seccomp_policy_name).with_context(|| {
527 format!("failed to find embedded seccomp policy: {seccomp_policy_name}")
528 })?;
529 jail.parse_seccomp_bytes(bpf_program).with_context(|| {
530 format!("failed to parse embedded seccomp policy: {seccomp_policy_name}")
531 })?;
532 Ok(())
533}