1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
// Copyright 2022 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#![deny(missing_docs)]
use std::fs::read_to_string;
use std::num::ParseIntError;
use std::path::Path;
use std::str::FromStr;
use std::thread::sleep;
use std::time::Duration;
use anyhow::anyhow;
use anyhow::bail;
use anyhow::Context;
use anyhow::Result;
use base::linux::getpid;
use base::linux::kill;
use base::linux::Signal;
use base::Pid;
/// Stops all the crosvm device processes during moving the guest memory to the staging memory.
///
/// While moving, we must guarantee that no one changes the guest memory contents. This supports
/// devices in sandbox mode only.
///
/// We stop all the crosvm processes instead of the alternatives.
///
/// * Just stop vCPUs
/// * devices still may works in the child process and write something to the guest memory.
/// * Use write protection of userfaultfd
/// * UFFDIO_REGISTER_MODE_WP for shmem is WIP and not supported yet.
/// * `devices::Suspendable::sleep()`
/// * `Suspendable` is not supported by all devices yet.
pub struct ProcessesGuard {
pids: Vec<Pid>,
}
/// Stops all crosvm child processes except this monitor process using signals.
///
/// The stopped processes are resumed when the freezer object is freed.
///
/// This must be called from the main process.
pub fn freeze_child_processes(monitor_pid: Pid) -> Result<ProcessesGuard> {
let mut guard = ProcessesGuard {
pids: load_descendants(getpid(), monitor_pid)?,
};
for _ in 0..3 {
guard.stop_the_world().context("stop the world")?;
let pids_after = load_descendants(getpid(), monitor_pid)?;
if pids_after == guard.pids {
return Ok(guard);
}
guard.pids = pids_after;
}
bail!("new processes forked while freezing");
}
impl ProcessesGuard {
/// Stops all the crosvm processes by sending SIGSTOP signal.
fn stop_the_world(&self) -> Result<()> {
for pid in &self.pids {
// SAFETY:
// safe because pid in pids are crosvm processes except this monitor process.
unsafe { kill(*pid, Signal::Stop as i32) }.context("failed to stop process")?;
}
for pid in &self.pids {
wait_process_stopped(*pid).context("wait process stopped")?;
}
Ok(())
}
/// Resumes all the crosvm processes by sending SIGCONT signal.
fn continue_the_world(&self) {
for pid in &self.pids {
// SAFETY:
// safe because pid in pids are crosvm processes except this monitor process and
// continue signal does not have side effects.
// ignore the result because we don't care whether it succeeds.
let _ = unsafe { kill(*pid, Signal::Continue as i32) };
}
}
}
impl Drop for ProcessesGuard {
fn drop(&mut self) {
self.continue_the_world();
}
}
/// Loads Pids of crosvm descendant processes except the monitor procesess.
fn load_descendants(current_pid: Pid, monitor_pid: Pid) -> Result<Vec<Pid>> {
// children of the current process.
let children = read_to_string(format!("/proc/{0}/task/{0}/children", current_pid))
.context("read children")?;
let children = children.trim();
// str::split() to empty string results a iterator just returning 1 empty string.
if children.is_empty() {
return Ok(Vec::new());
}
let pids: std::result::Result<Vec<i32>, ParseIntError> = children
.split(" ")
.map(i32::from_str)
// except this monitor process
.filter(|pid| match pid {
Ok(pid) => *pid != monitor_pid,
_ => true,
})
.collect();
let pids = pids.context("parse pids")?;
let mut result = Vec::new();
for pid in pids {
result.push(pid);
let pids = load_descendants(pid, monitor_pid)?;
result.extend(pids);
}
Ok(result)
}
/// Extract process state from /proc/pid/stat.
///
/// `/proc/<pid>/stat` file contains metadata for the process including the process state.
///
/// See [proc(5)](https://man7.org/linux/man-pages/man5/proc.5.html) for the format.
fn parse_process_state(text: &str) -> Option<char> {
let chars = text.chars();
let mut chars = chars.peekable();
// skip to the end of "comm"
while match chars.next() {
Some(c) => c != ')',
None => false,
} {}
// skip the whitespace between "comm" and "state"
while match chars.peek() {
Some(c) => {
let is_whitespace = *c == ' ';
if is_whitespace {
chars.next();
}
is_whitespace
}
None => false,
} {}
// the state
chars.next()
}
fn wait_for_task_stopped(task_path: &Path) -> Result<()> {
for _ in 0..10 {
let stat = read_to_string(task_path.join("stat")).context("read process status")?;
if let Some(state) = parse_process_state(&stat) {
if state == 'T' {
return Ok(());
}
}
sleep(Duration::from_millis(50));
}
Err(anyhow!("time out"))
}
fn wait_process_stopped(pid: Pid) -> Result<()> {
let all_tasks = std::fs::read_dir(format!("/proc/{}/task", pid)).context("read tasks")?;
for task in all_tasks {
wait_for_task_stopped(&task.context("read task entry")?.path()).context("wait for task")?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_process_state_tests() {
assert_eq!(parse_process_state("1234 (crosvm) T 0 0 0").unwrap(), 'T');
assert_eq!(parse_process_state("1234 (crosvm) R 0 0 0").unwrap(), 'R');
// more than 1 white space
assert_eq!(parse_process_state("1234 (crosvm) T 0 0 0").unwrap(), 'T');
// no white space between comm and state
assert_eq!(parse_process_state("1234 (crosvm)T 0 0 0").unwrap(), 'T');
// white space in the comm
assert_eq!(
parse_process_state("1234 (crosvm --test) T 0 0 0").unwrap(),
'T'
);
// no status
assert_eq!(parse_process_state("1234 (crosvm)").is_none(), true);
}
}