1use std::collections::BTreeMap;
10use std::convert::TryFrom;
11use std::fs;
12use std::sync::Arc;
13use std::time::Duration;
14
15use anyhow::Context;
16use base::custom_serde::serialize_arc_mutex;
17use base::debug;
18use base::error;
19use base::warn;
20use base::AsRawDescriptor;
21use base::Descriptor;
22use base::Error as SysError;
23use base::Event;
24use base::EventToken;
25use base::SendTube;
26use base::Timer;
27use base::TimerTrait;
28use base::Tube;
29use base::VmEventType;
30use base::WaitContext;
31use base::WorkerThread;
32use serde::Deserialize;
33use serde::Serialize;
34use snapshot::AnySnapshot;
35use sync::Mutex;
36use vm_control::VmResponse;
37
38use crate::pci::CrosvmDeviceId;
39use crate::BusAccessInfo;
40use crate::BusDevice;
41use crate::DeviceId;
42use crate::IrqEdgeEvent;
43use crate::Suspendable;
44
45const VMWDT_REG_STATUS: u32 = 0x00;
47const VMWDT_REG_LOAD_CNT: u32 = 0x04;
48const VMWDT_REG_CURRENT_CNT: u32 = 0x08;
49const VMWDT_REG_CLOCK_FREQ_HZ: u32 = 0x0C;
50
51const VMWDT_REG_LEN: u64 = 0x10;
53
54pub const VMWDT_DEFAULT_TIMEOUT_SEC: u32 = 10;
55pub const VMWDT_DEFAULT_CLOCK_HZ: u32 = 2;
56
57const PROCSTAT_GUEST_TIME_INDX: usize = 42;
59
60#[derive(Serialize)]
61pub struct VmwdtPerCpu {
62 is_enabled: bool,
64 #[serde(skip_serializing)]
66 timer: Timer,
67 timer_freq_hz: u64,
69 last_guest_time_ms: i64,
71 thread_id: u32,
73 process_id: u32,
75 next_expiration_interval_ms: i64,
78 stall_evt_ppi_triggered: bool,
80 repeating_interval: Option<Duration>,
82}
83
84#[derive(Deserialize)]
85struct VmwdtPerCpuRestore {
86 is_enabled: bool,
87 timer_freq_hz: u64,
88 last_guest_time_ms: i64,
89 next_expiration_interval_ms: i64,
90 repeating_interval: Option<Duration>,
91}
92
93pub struct Vmwdt {
94 vm_wdts: Arc<Mutex<Vec<VmwdtPerCpu>>>,
95 worker_thread: Option<WorkerThread<Tube>>,
97 reset_evt_wrtube: SendTube,
100 activated: bool,
101 stall_evt: IrqEdgeEvent,
103 vm_ctrl_tube: Option<Tube>,
104}
105
106#[derive(Serialize)]
107struct VmwdtSnapshot {
108 #[serde(serialize_with = "serialize_arc_mutex")]
109 vm_wdts: Arc<Mutex<Vec<VmwdtPerCpu>>>,
110 activated: bool,
111}
112
113#[derive(Deserialize)]
114struct VmwdtRestore {
115 vm_wdts: Vec<VmwdtPerCpuRestore>,
116 activated: bool,
117}
118
119impl Vmwdt {
120 pub fn new(
121 cpu_count: usize,
122 reset_evt_wrtube: SendTube,
123 evt: IrqEdgeEvent,
124 vm_ctrl_tube: Tube,
125 ) -> anyhow::Result<Vmwdt> {
126 let mut vec = Vec::new();
127 for _ in 0..cpu_count {
128 vec.push(VmwdtPerCpu {
129 last_guest_time_ms: 0,
130 thread_id: 0,
131 process_id: 0,
132 is_enabled: false,
133 stall_evt_ppi_triggered: false,
134 timer: Timer::new().context("failed to create Timer")?,
135 timer_freq_hz: 0,
136 next_expiration_interval_ms: 0,
137 repeating_interval: None,
138 });
139 }
140 let vm_wdts = Arc::new(Mutex::new(vec));
141
142 Ok(Vmwdt {
143 vm_wdts,
144 worker_thread: None,
145 reset_evt_wrtube,
146 activated: false,
147 stall_evt: evt,
148 vm_ctrl_tube: Some(vm_ctrl_tube),
149 })
150 }
151
152 pub fn vmwdt_worker_thread(
153 vm_wdts: Arc<Mutex<Vec<VmwdtPerCpu>>>,
154 kill_evt: Event,
155 reset_evt_wrtube: SendTube,
156 stall_evt: IrqEdgeEvent,
157 vm_ctrl_tube: Tube,
158 worker_started_send: Option<SendTube>,
159 ) -> anyhow::Result<Tube> {
160 let msg = vm_control::VmRequest::VcpuPidTid;
161 vm_ctrl_tube
162 .send(&msg)
163 .context("failed to send request to fetch Vcpus PID and TID")?;
164 let vcpus_pid_tid: BTreeMap<usize, (u32, u32)> = match vm_ctrl_tube
165 .recv()
166 .context("failed to receive vmwdt pids and tids")?
167 {
168 VmResponse::VcpuPidTidResponse { pid_tid_map } => pid_tid_map,
169 _ => {
170 return Err(anyhow::anyhow!(
171 "Receive incorrect message type when trying to get vcpu pid tid map"
172 ));
173 }
174 };
175 {
176 let mut vm_wdts = vm_wdts.lock();
177 for (i, vmwdt) in (*vm_wdts).iter_mut().enumerate() {
178 let pid_tid = vcpus_pid_tid
179 .get(&i)
180 .context("vmwdts empty, which could indicate no vcpus are initialized")?;
181 vmwdt.process_id = pid_tid.0;
182 vmwdt.thread_id = pid_tid.1;
183 }
184 }
185 if let Some(worker_started_send) = worker_started_send {
186 worker_started_send
187 .send(&())
188 .context("failed to send vmwdt worker started")?;
189 }
190 #[derive(EventToken)]
191 enum Token {
192 Kill,
193 Timer(usize),
194 }
195
196 let wait_ctx: WaitContext<Token> =
197 WaitContext::new().context("Failed to create wait_ctx")?;
198 wait_ctx
199 .add(&kill_evt, Token::Kill)
200 .context("Failed to add Tokens to wait_ctx")?;
201
202 let len = vm_wdts.lock().len();
203 for clock_id in 0..len {
204 let timer_fd = vm_wdts.lock()[clock_id].timer.as_raw_descriptor();
205 wait_ctx
206 .add(&Descriptor(timer_fd), Token::Timer(clock_id))
207 .context("Failed to link FDs to Tokens")?;
208 }
209
210 loop {
211 let events = wait_ctx.wait().context("Failed to wait for events")?;
212 for event in events.iter().filter(|e| e.is_readable) {
213 match event.token {
214 Token::Kill => {
215 return Ok(vm_ctrl_tube);
216 }
217 Token::Timer(cpu_id) => {
218 let mut wdts_locked = vm_wdts.lock();
219 let watchdog = &mut wdts_locked[cpu_id];
220 match watchdog.timer.mark_waited() {
221 Ok(true) => continue, Ok(false) => {}
223 Err(e) => {
224 error!("error waiting for timer event on vcpu {cpu_id}: {e:#}");
225 continue;
226 }
227 }
228
229 let current_guest_time_ms =
230 Vmwdt::get_guest_time_ms(watchdog.process_id, watchdog.thread_id)
231 .context("get_guest_time_ms failed")?;
232 let remaining_time_ms = watchdog.next_expiration_interval_ms
233 - (current_guest_time_ms - watchdog.last_guest_time_ms);
234
235 if remaining_time_ms > 0 {
236 watchdog.next_expiration_interval_ms = remaining_time_ms;
237 if let Err(e) = watchdog
238 .timer
239 .reset_oneshot(Duration::from_millis(remaining_time_ms as u64))
240 {
241 error!(
242 "failed to reset internal timer on vcpu {}: {:#}",
243 cpu_id, e
244 );
245 }
246 watchdog.repeating_interval = None;
247 } else {
248 if watchdog.stall_evt_ppi_triggered {
249 if let Err(e) = reset_evt_wrtube
250 .send::<VmEventType>(&VmEventType::WatchdogReset)
251 {
252 error!("{} failed to send reset event from vcpu {}", e, cpu_id)
253 }
254 }
255
256 stall_evt
257 .trigger()
258 .context("Failed to trigger stall event")?;
259 watchdog.stall_evt_ppi_triggered = true;
260 watchdog.last_guest_time_ms = current_guest_time_ms;
261 }
262 }
263 }
264 }
265 }
266 }
267
268 fn start(&mut self, worker_started_send: Option<SendTube>) -> anyhow::Result<()> {
269 let vm_wdts = self.vm_wdts.clone();
270 let reset_evt_wrtube = self.reset_evt_wrtube.try_clone().unwrap();
271 let stall_event = self.stall_evt.try_clone().unwrap();
272 let vm_ctrl_tube = self
273 .vm_ctrl_tube
274 .take()
275 .context("missing vm control tube")?;
276
277 self.activated = true;
278 self.worker_thread = Some(WorkerThread::start("vmwdt worker", |kill_evt| {
279 Vmwdt::vmwdt_worker_thread(
280 vm_wdts,
281 kill_evt,
282 reset_evt_wrtube,
283 stall_event,
284 vm_ctrl_tube,
285 worker_started_send,
286 )
287 .expect("failed to start vmwdt worker thread")
288 }));
289 Ok(())
290 }
291
292 fn ensure_started(&mut self) {
293 if self.worker_thread.is_some() {
294 return;
295 }
296
297 let (worker_started_send, worker_started_recv) =
298 Tube::directional_pair().expect("failed to create vmwdt worker started tubes");
299 self.start(Some(worker_started_send))
300 .expect("failed to start Vmwdt");
301 worker_started_recv
302 .recv::<()>()
303 .expect("failed to receive vmwdt worker started");
304 }
305
306 #[cfg(any(target_os = "linux", target_os = "android"))]
307 pub fn get_guest_time_ms(process_id: u32, thread_id: u32) -> Result<i64, SysError> {
308 let stat_path = format!("/proc/{process_id}/task/{thread_id}/stat");
310 let contents = fs::read_to_string(stat_path)?;
311
312 let gtime_ticks = contents
313 .split_whitespace()
314 .nth(PROCSTAT_GUEST_TIME_INDX)
315 .and_then(|guest_time| guest_time.parse::<u64>().ok())
316 .unwrap_or(0);
317
318 let ticks_per_sec = unsafe { libc::sysconf(libc::_SC_CLK_TCK) } as u64;
321 Ok((gtime_ticks * 1000 / ticks_per_sec) as i64)
322 }
323
324 #[cfg(not(any(target_os = "linux", target_os = "android")))]
325 pub fn get_guest_time_ms(process_id: u32, thread_id: u32) -> Result<i64, SysError> {
326 Ok(0)
327 }
328}
329
330impl BusDevice for Vmwdt {
331 fn debug_label(&self) -> String {
332 "Vmwdt".to_owned()
333 }
334
335 fn device_id(&self) -> DeviceId {
336 CrosvmDeviceId::VmWatchdog.into()
337 }
338
339 fn read(&mut self, _offset: BusAccessInfo, _data: &mut [u8]) {}
340
341 fn write(&mut self, info: BusAccessInfo, data: &[u8]) {
342 let data_array = match <&[u8; 4]>::try_from(data) {
343 Ok(array) => array,
344 _ => {
345 error!("Bad write size: {} for vmwdt", data.len());
346 return;
347 }
348 };
349
350 let reg_val = u32::from_ne_bytes(*data_array);
351 let cpu_index: usize = (info.offset / VMWDT_REG_LEN) as usize;
352 let reg_offset = (info.offset % VMWDT_REG_LEN) as u32;
353
354 if cpu_index > self.vm_wdts.lock().len() {
355 error!("Bad write cpu_index {}", cpu_index);
356 return;
357 }
358
359 match reg_offset {
360 VMWDT_REG_STATUS => {
361 self.ensure_started();
362 let mut wdts_locked = self.vm_wdts.lock();
363 let cpu_watchdog = &mut wdts_locked[cpu_index];
364
365 cpu_watchdog.is_enabled = reg_val != 0;
366
367 if reg_val != 0 {
368 let interval = Duration::from_millis(1000 / cpu_watchdog.timer_freq_hz);
369 cpu_watchdog.repeating_interval = Some(interval);
370 cpu_watchdog
371 .timer
372 .reset_repeating(interval)
373 .expect("Failed to reset timer repeating interval");
374 } else {
375 cpu_watchdog.repeating_interval = None;
376 cpu_watchdog
377 .timer
378 .clear()
379 .expect("Failed to clear cpu watchdog timer");
380 }
381 }
382 VMWDT_REG_LOAD_CNT => {
383 self.ensure_started();
384 let (process_id, thread_id) = {
385 let mut wdts_locked = self.vm_wdts.lock();
386 let cpu_watchdog = &mut wdts_locked[cpu_index];
387 (cpu_watchdog.process_id, cpu_watchdog.thread_id)
388 };
389 let guest_time_ms = Vmwdt::get_guest_time_ms(process_id, thread_id)
390 .expect("get_guest_time_ms failed");
391
392 let mut wdts_locked = self.vm_wdts.lock();
393 let cpu_watchdog = &mut wdts_locked[cpu_index];
394 let next_expiration_interval_ms =
395 reg_val as u64 * 1000 / cpu_watchdog.timer_freq_hz;
396
397 cpu_watchdog.last_guest_time_ms = guest_time_ms;
398 cpu_watchdog.stall_evt_ppi_triggered = false;
399 cpu_watchdog.next_expiration_interval_ms = next_expiration_interval_ms as i64;
400
401 if cpu_watchdog.is_enabled {
402 if let Err(_e) = cpu_watchdog
403 .timer
404 .reset_oneshot(Duration::from_millis(next_expiration_interval_ms))
405 {
406 error!("failed to reset one-shot vcpu time {}", cpu_index);
407 }
408 cpu_watchdog.repeating_interval = None;
409 }
410 }
411 VMWDT_REG_CURRENT_CNT => {
412 warn!("invalid write to read-only VMWDT_REG_CURRENT_CNT register");
413 }
414 VMWDT_REG_CLOCK_FREQ_HZ => {
415 let mut wdts_locked = self.vm_wdts.lock();
416 let cpu_watchdog = &mut wdts_locked[cpu_index];
417
418 debug!(
419 "CPU:{:x} wrote VMWDT_REG_CLOCK_FREQ_HZ {:x}",
420 cpu_index, reg_val
421 );
422 cpu_watchdog.timer_freq_hz = reg_val as u64;
423 }
424 _ => unreachable!(),
425 }
426 }
427}
428
429impl Suspendable for Vmwdt {
430 fn sleep(&mut self) -> anyhow::Result<()> {
431 if let Some(worker) = self.worker_thread.take() {
432 self.vm_ctrl_tube = Some(worker.stop());
433 }
434 Ok(())
435 }
436
437 fn wake(&mut self) -> anyhow::Result<()> {
438 if self.activated {
439 self.start(None)?;
446 let mut vm_wdts = self.vm_wdts.lock();
447 for vmwdt in vm_wdts.iter_mut() {
448 if let Some(interval) = &vmwdt.repeating_interval {
449 vmwdt
450 .timer
451 .reset_repeating(*interval)
452 .context("failed to write repeating interval")?;
453 } else if vmwdt.is_enabled {
454 vmwdt
455 .timer
456 .reset_oneshot(Duration::from_millis(
457 vmwdt.next_expiration_interval_ms as u64,
458 ))
459 .context("failed to write oneshot interval")?;
460 }
461 }
462 }
463 Ok(())
464 }
465
466 fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
467 AnySnapshot::to_any(&VmwdtSnapshot {
468 vm_wdts: self.vm_wdts.clone(),
469 activated: self.activated,
470 })
471 .context("failed to snapshot Vmwdt")
472 }
473
474 fn restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
475 let deser: VmwdtRestore =
476 AnySnapshot::from_any(data).context("failed to deserialize Vmwdt")?;
477 let mut vm_wdts = self.vm_wdts.lock();
478 for (vmwdt_restore, vmwdt) in deser.vm_wdts.iter().zip(vm_wdts.iter_mut()) {
479 vmwdt.is_enabled = vmwdt_restore.is_enabled;
480 vmwdt.timer_freq_hz = vmwdt_restore.timer_freq_hz;
481 vmwdt.last_guest_time_ms = vmwdt_restore.last_guest_time_ms;
482 vmwdt.next_expiration_interval_ms = vmwdt_restore.next_expiration_interval_ms;
483 vmwdt.repeating_interval = vmwdt_restore.repeating_interval;
484 }
485 self.activated = deser.activated;
486 Ok(())
487 }
488}
489
490#[cfg(test)]
491mod tests {
492 use std::process;
493 use std::thread::sleep;
494
495 #[cfg(any(target_os = "linux", target_os = "android"))]
496 use base::gettid;
497 use base::poll_assert;
498 use base::Tube;
499
500 use super::*;
501
502 const AARCH64_VMWDT_ADDR: u64 = 0x3000;
503 const TEST_VMWDT_CPU_NO: usize = 0x1;
504
505 fn vmwdt_bus_address(offset: u64) -> BusAccessInfo {
506 BusAccessInfo {
507 offset,
508 address: AARCH64_VMWDT_ADDR,
509 id: 0,
510 }
511 }
512
513 #[test]
514 fn test_watchdog_internal_timer() {
515 let (vm_evt_wrtube, _vm_evt_rdtube) = Tube::directional_pair().unwrap();
516 let (vm_ctrl_wrtube, vm_ctrl_rdtube) = Tube::pair().unwrap();
517 let irq = IrqEdgeEvent::new().unwrap();
518 #[cfg(any(target_os = "linux", target_os = "android"))]
519 {
520 vm_ctrl_wrtube
521 .send(&VmResponse::VcpuPidTidResponse {
522 pid_tid_map: BTreeMap::from([(0, (process::id(), gettid() as u32))]),
523 })
524 .unwrap();
525 }
526 let mut device = Vmwdt::new(TEST_VMWDT_CPU_NO, vm_evt_wrtube, irq, vm_ctrl_rdtube).unwrap();
527
528 device.write(
530 vmwdt_bus_address(VMWDT_REG_CLOCK_FREQ_HZ as u64),
531 &[10, 0, 0, 0],
532 );
533 device.write(vmwdt_bus_address(VMWDT_REG_LOAD_CNT as u64), &[1, 0, 0, 0]);
534 device.write(vmwdt_bus_address(VMWDT_REG_STATUS as u64), &[1, 0, 0, 0]);
535 let next_expiration_ms = {
536 let mut vmwdt_locked = device.vm_wdts.lock();
537 vmwdt_locked[0].last_guest_time_ms = 10;
540 vmwdt_locked[0].next_expiration_interval_ms
541 };
542
543 poll_assert!(10, || {
545 sleep(Duration::from_millis(50));
546 let vmwdt_locked = device.vm_wdts.lock();
547 vmwdt_locked[0].next_expiration_interval_ms != next_expiration_ms
549 });
550 }
551
552 #[test]
553 fn test_watchdog_expiration() {
554 let (vm_evt_wrtube, vm_evt_rdtube) = Tube::directional_pair().unwrap();
555 let (vm_ctrl_wrtube, vm_ctrl_rdtube) = Tube::pair().unwrap();
556 let irq = IrqEdgeEvent::new().unwrap();
557 #[cfg(any(target_os = "linux", target_os = "android"))]
558 {
559 vm_ctrl_wrtube
560 .send(&VmResponse::VcpuPidTidResponse {
561 pid_tid_map: BTreeMap::from([(0, (process::id(), gettid() as u32))]),
562 })
563 .unwrap();
564 }
565 let mut device = Vmwdt::new(TEST_VMWDT_CPU_NO, vm_evt_wrtube, irq, vm_ctrl_rdtube).unwrap();
566
567 device.write(
569 vmwdt_bus_address(VMWDT_REG_CLOCK_FREQ_HZ as u64),
570 &[10, 0, 0, 0],
571 );
572 device.write(vmwdt_bus_address(VMWDT_REG_LOAD_CNT as u64), &[1, 0, 0, 0]);
573 device.write(vmwdt_bus_address(VMWDT_REG_STATUS as u64), &[1, 0, 0, 0]);
574 device.vm_wdts.lock()[0].last_guest_time_ms = -100;
577
578 poll_assert!(10, || {
580 sleep(Duration::from_millis(50));
581 let vmwdt_locked = device.vm_wdts.lock();
582 vmwdt_locked[0].stall_evt_ppi_triggered
583 });
584
585 device.vm_wdts.lock()[0].last_guest_time_ms = -100;
587
588 poll_assert!(10, || {
590 sleep(Duration::from_millis(50));
591 match vm_evt_rdtube.recv::<VmEventType>() {
592 Ok(vm_event) => vm_event == VmEventType::WatchdogReset,
593 Err(_e) => false,
594 }
595 });
596 }
597}