1use std::cell::RefCell;
6use std::fs::File;
7use std::io::prelude::*;
8use std::process;
9use std::sync::mpsc;
10use std::sync::Arc;
11use std::sync::Barrier;
12use std::thread;
13use std::thread::JoinHandle;
14#[cfg(target_arch = "x86_64")]
15use std::time::Duration;
16
17#[cfg(target_arch = "aarch64")]
18use aarch64::AArch64 as Arch;
19use anyhow::Context;
20use anyhow::Result;
21use arch::CpuConfigArch;
22use arch::CpuSet;
23use arch::IrqChipArch;
24use arch::LinuxArch;
25use arch::VcpuArch;
26use arch::VcpuInitArch;
27use arch::VmArch;
28use base::gettid;
29use base::sched_attr;
30use base::sched_setattr;
31use base::signal::clear_signal_handler;
32use base::signal::BlockedSignal;
33use base::*;
34use devices::Bus;
35use devices::IrqChip;
36use devices::VcpuRunState;
37use hypervisor::IoOperation;
38use hypervisor::IoParams;
39use hypervisor::VcpuExit;
40use hypervisor::VcpuSignalHandle;
41use libc::c_int;
42use metrics_events::MetricEventType;
43#[cfg(target_arch = "riscv64")]
44use riscv64::Riscv64 as Arch;
45use serde::Deserialize;
46use serde::Serialize;
47#[cfg(target_arch = "x86_64")]
48use sync::Mutex;
49use vm_control::*;
50#[cfg(feature = "gdb")]
51use vm_memory::GuestMemory;
52#[cfg(target_arch = "x86_64")]
53use x86_64::X8664arch as Arch;
54
55use super::ExitState;
56#[cfg(target_arch = "x86_64")]
57use crate::crosvm::ratelimit::Ratelimit;
58
59const SCHED_FLAG_RESET_ON_FORK: u64 = 0x1;
61const SCHED_FLAG_KEEP_POLICY: u64 = 0x08;
62const SCHED_FLAG_KEEP_PARAMS: u64 = 0x10;
63const SCHED_FLAG_UTIL_CLAMP_MIN: u64 = 0x20;
64const SCHED_SCALE_CAPACITY: u32 = 1024;
65const SCHED_FLAG_KEEP_ALL: u64 = SCHED_FLAG_KEEP_POLICY | SCHED_FLAG_KEEP_PARAMS;
66
67#[allow(clippy::unnecessary_cast)]
70pub fn set_vcpu_thread_scheduling(
71 vcpu_affinity: CpuSet,
72 core_scheduling: bool,
73 enable_per_vm_core_scheduling: bool,
74 vcpu_cgroup_tasks_file: Option<File>,
75 run_rt: bool,
76 boost_uclamp: bool,
77) -> anyhow::Result<()> {
78 if boost_uclamp {
79 let mut sched_attr = sched_attr {
80 sched_flags: SCHED_FLAG_KEEP_ALL as u64
81 | SCHED_FLAG_UTIL_CLAMP_MIN
82 | SCHED_FLAG_RESET_ON_FORK as u64,
83 sched_util_min: SCHED_SCALE_CAPACITY,
84 ..Default::default()
85 };
86
87 if let Err(e) = sched_setattr(0, &mut sched_attr, 0) {
88 warn!("Failed to boost vcpu util: {}", e);
89 }
90 }
91
92 if core_scheduling && !enable_per_vm_core_scheduling {
93 if let Err(e) = enable_core_scheduling() {
95 error!("Failed to enable core scheduling: {}", e);
96 }
97 }
98
99 if let Some(mut f) = vcpu_cgroup_tasks_file {
101 f.write_all(base::gettid().to_string().as_bytes())
102 .context("failed to write vcpu tid to cgroup tasks")?;
103 }
104
105 if !vcpu_affinity.is_empty() {
109 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
110 error!("Failed to set CPU affinity: {}", e);
111 }
112 }
113
114 if run_rt {
115 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
116 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
117 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
118 {
119 warn!("Failed to set vcpu to real time: {}", e);
120 }
121 }
122
123 Ok(())
124}
125
126pub fn runnable_vcpu(
128 cpu_id: usize,
129 vcpu_id: usize,
130 vcpu: Option<Arc<dyn VcpuArch>>,
131 vcpu_init: VcpuInitArch,
132 vm: Arc<dyn VmArch>,
133 irq_chip: &dyn IrqChipArch,
134 vcpu_count: usize,
135 cpu_config: Option<CpuConfigArch>,
136) -> Result<Arc<dyn VcpuArch>> {
137 let vcpu = match vcpu {
138 Some(v) => v,
139 None => {
140 vm.create_vcpu(vcpu_id).context("failed to create vcpu")?
143 }
144 };
145
146 irq_chip
147 .add_vcpu(cpu_id, vcpu.clone())
148 .context("failed to add vcpu to irq chip")?;
149
150 Arch::configure_vcpu(
151 &*vm,
152 vm.get_hypervisor(),
153 irq_chip,
154 &*vcpu,
155 vcpu_init,
156 cpu_id,
157 vcpu_count,
158 cpu_config,
159 )
160 .context("failed to configure vcpu")?;
161
162 Ok(vcpu)
163}
164
165thread_local!(static VCPU_THREAD: RefCell<Option<VcpuSignalHandle>> = const { RefCell::new(None) });
166
167fn set_vcpu_thread_local(vcpu: Option<&dyn VcpuArch>, signal_num: c_int) {
168 let _blocked_signal = BlockedSignal::new(signal_num);
174
175 VCPU_THREAD.with(|v| {
176 let mut vcpu_thread = v.borrow_mut();
177
178 if let Some(vcpu) = vcpu {
179 assert!(vcpu_thread.is_none());
180 *vcpu_thread = Some(vcpu.signal_handle());
181 } else {
182 *vcpu_thread = None;
183 }
184 });
185}
186
187pub fn setup_vcpu_signal_handler() -> Result<()> {
188 unsafe {
190 extern "C" fn handle_signal(_: c_int) {
191 let _result = VCPU_THREAD.try_with(|v| {
195 if let Some(vcpu_signal_handle) = &(*v.borrow()) {
196 vcpu_signal_handle.signal_immediate_exit();
197 }
198 });
199 }
200
201 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
202 .context("error registering signal handler")?;
203 }
204 Ok(())
205}
206
207pub fn remove_vcpu_signal_handler() -> Result<()> {
208 clear_signal_handler(SIGRTMIN() + 0).context("error unregistering signal handler")
209}
210
211fn vcpu_loop(
212 mut run_mode: VmRunMode,
213 cpu_id: usize,
214 vcpu: Arc<dyn VcpuArch>,
215 irq_chip: Arc<dyn IrqChipArch>,
216 run_rt: bool,
217 delay_rt: bool,
218 io_bus: Bus,
219 mmio_bus: Bus,
220 hypercall_bus: Bus,
221 from_main_tube: mpsc::Receiver<VcpuControl>,
222 #[cfg(feature = "gdb")] to_gdb_tube: Option<mpsc::Sender<VcpuDebugStatusMessage>>,
223 #[cfg(feature = "gdb")] guest_mem: GuestMemory,
224 #[cfg(target_arch = "x86_64")] bus_lock_ratelimit_ctrl: Arc<Mutex<Ratelimit>>,
225) -> ExitState {
226 let mut interrupted_by_signal = false;
227
228 loop {
229 if interrupted_by_signal || run_mode != VmRunMode::Running {
233 'state_loop: loop {
234 let msg = match from_main_tube.try_recv() {
236 Ok(m) => m,
237 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
238 break 'state_loop;
241 }
242 Err(mpsc::TryRecvError::Empty) => {
243 match from_main_tube.recv() {
245 Ok(m) => m,
246 Err(mpsc::RecvError) => {
247 error!("Failed to read from main tube in vcpu");
248 return ExitState::Crash;
249 }
250 }
251 }
252 Err(mpsc::TryRecvError::Disconnected) => {
253 error!("Failed to read from main tube in vcpu");
254 return ExitState::Crash;
255 }
256 };
257
258 let mut messages = vec![msg];
260 messages.append(&mut from_main_tube.try_iter().collect());
261
262 for msg in messages {
263 match msg {
264 VcpuControl::RunState(new_mode) => {
265 run_mode = new_mode;
266 match run_mode {
267 VmRunMode::Running => {}
268 VmRunMode::Suspending => {
269 if let Err(e) = vcpu.on_suspend() {
270 error!(
271 "failed to tell hypervisor vcpu {} is suspending: {}",
272 cpu_id, e
273 );
274 }
275 }
276 VmRunMode::Breakpoint => {}
277 VmRunMode::Exiting => return ExitState::Stop,
278 }
279 }
280 #[cfg(feature = "gdb")]
281 VcpuControl::Debug(d) => {
282 if let Err(e) = crate::crosvm::gdb::vcpu_control_debug(
283 cpu_id,
284 &*vcpu,
285 &guest_mem,
286 d,
287 to_gdb_tube.as_ref(),
288 ) {
289 error!("Failed to handle VcpuControl::Debug message: {:#}", e);
290 }
291 }
292 VcpuControl::MakeRT => {
293 if run_rt && delay_rt {
294 info!("Making vcpu {} RT\n", cpu_id);
295 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
296 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
297 .and_then(|_| {
298 set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL))
299 })
300 {
301 warn!("Failed to set vcpu to real time: {}", e);
302 }
303 }
304 }
305 VcpuControl::GetStates(response_chan) => {
306 if let Err(e) = response_chan.send(run_mode) {
307 error!("Failed to send GetState: {}", e);
308 };
309 }
310 VcpuControl::Snapshot(snapshot_writer, response_chan) => {
311 let resp = vcpu
312 .snapshot()
313 .and_then(|s| {
314 snapshot_writer
315 .write_fragment(&format!("vcpu{}", vcpu.id()), &s)
316 })
317 .with_context(|| format!("Failed to snapshot Vcpu #{}", vcpu.id()));
318 if let Err(e) = response_chan.send(resp) {
319 error!("Failed to send snapshot response: {}", e);
320 }
321 }
322 VcpuControl::Restore(req) => {
323 let resp = req
324 .snapshot_reader
325 .read_fragment(&format!("vcpu{}", vcpu.id()))
326 .and_then(|s| {
327 vcpu.restore(
328 &s,
329 #[cfg(target_arch = "x86_64")]
330 req.host_tsc_reference_moment,
331 )
332 })
333 .with_context(|| format!("Failed to restore Vcpu #{}", vcpu.id()));
334 if let Err(e) = req.result_sender.send(resp) {
335 error!("Failed to send restore response: {}", e);
336 }
337 }
338 VcpuControl::Throttle(target_us) => {
339 let start_time = std::time::Instant::now();
340
341 while start_time.elapsed().as_micros() < target_us.into() {
342 }
347 }
348 }
349 }
350 if run_mode == VmRunMode::Running {
351 break 'state_loop;
352 }
353 }
354 }
355
356 interrupted_by_signal = false;
357
358 match irq_chip.wait_until_runnable(&*vcpu) {
365 Ok(VcpuRunState::Runnable) => {}
366 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
367 Err(e) => error!(
368 "error waiting for vcpu {} to become runnable: {}",
369 cpu_id, e
370 ),
371 }
372
373 if !interrupted_by_signal {
374 match vcpu.run() {
375 Ok(VcpuExit::Io) => {
376 if let Err(e) =
377 vcpu.handle_io(&mut |IoParams { address, operation }| match operation {
378 IoOperation::Read(data) => {
379 io_bus.read(address, data);
380 }
381 IoOperation::Write(data) => {
382 io_bus.write(address, data);
383 }
384 })
385 {
386 error!("failed to handle io: {}", e)
387 }
388 }
389 Ok(VcpuExit::Mmio) => {
390 if let Err(e) =
391 vcpu.handle_mmio(&mut |IoParams { address, operation }| match operation {
392 IoOperation::Read(data) => {
393 mmio_bus.read(address, data);
394 Ok(())
395 }
396 IoOperation::Write(data) => {
397 mmio_bus.write(address, data);
398 Ok(())
399 }
400 })
401 {
402 error!("failed to handle mmio: {}", e);
403 }
404 }
405 Ok(VcpuExit::Hypercall) => {
406 if let Err(e) =
407 vcpu.handle_hypercall(&mut |params| hypercall_bus.handle_hypercall(params))
408 {
409 error!("failed to handle hypercall: {}", e);
410 }
411 }
412 Ok(VcpuExit::IoapicEoi { vector }) => {
413 if let Err(e) = irq_chip.broadcast_eoi(vector) {
414 error!(
415 "failed to broadcast eoi {} on vcpu {}: {}",
416 vector, cpu_id, e
417 );
418 }
419 }
420 Ok(VcpuExit::IrqWindowOpen) => {}
421 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
422 Ok(VcpuExit::Shutdown(reason)) => {
423 if let Err(e) = reason {
424 metrics::log_descriptor(
425 MetricEventType::VcpuShutdownError,
426 e.get_raw_error_code() as i64,
427 );
428 }
429 return ExitState::Stop;
430 }
431 Ok(VcpuExit::FailEntry {
432 hardware_entry_failure_reason,
433 }) => {
434 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
435 return ExitState::Crash;
436 }
437 Ok(VcpuExit::SystemEventShutdown) => {
438 info!("system shutdown event on vcpu {}", cpu_id);
439 return ExitState::Stop;
440 }
441 Ok(VcpuExit::SystemEventReset) => {
442 info!("system reset event");
443 return ExitState::Reset;
444 }
445 Ok(VcpuExit::SystemEventCrash) => {
446 info!("system crash event on vcpu {}", cpu_id);
447 return ExitState::GuestPanic;
448 }
449 Ok(VcpuExit::Debug) => {
450 #[cfg(feature = "gdb")]
451 if let Err(e) =
452 crate::crosvm::gdb::vcpu_exit_debug(cpu_id, to_gdb_tube.as_ref())
453 {
454 error!("Failed to handle VcpuExit::Debug: {:#}", e);
455 return ExitState::Crash;
456 }
457
458 run_mode = VmRunMode::Breakpoint;
459 }
460 #[cfg(target_arch = "x86_64")]
461 Ok(VcpuExit::BusLock) => {
462 let delay_ns: u64 = bus_lock_ratelimit_ctrl.lock().ratelimit_calculate_delay(1);
463 thread::sleep(Duration::from_nanos(delay_ns));
464 }
465 Ok(VcpuExit::Sbi {
466 extension_id: _,
467 function_id: _,
468 args: _,
469 }) => {
470 unimplemented!("Sbi exits not yet supported");
471 }
472 Ok(VcpuExit::RiscvCsr {
473 csr_num,
474 new_value,
475 write_mask,
476 ret_value: _,
477 }) => {
478 unimplemented!(
479 "csr exit! {:#x} to {:#x} mask {:#x}",
480 csr_num,
481 new_value,
482 write_mask
483 );
484 }
485
486 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
487 Err(e) => match e.errno() {
488 libc::EINTR => interrupted_by_signal = true,
489 libc::EAGAIN => {}
490 _ => {
491 error!("vcpu hit unknown error: {}", e);
492 return ExitState::Crash;
493 }
494 },
495 }
496 }
497
498 if interrupted_by_signal {
499 vcpu.set_immediate_exit(false);
500 }
501
502 if let Err(e) = irq_chip.inject_interrupts(&*vcpu) {
503 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
504 }
505 }
506}
507
508#[derive(Serialize, Deserialize)]
509pub struct VcpuPidTid {
510 pub vcpu_id: usize,
511 pub process_id: u32,
512 pub thread_id: u32,
513}
514
515pub fn run_vcpu(
516 cpu_id: usize,
517 vcpu_id: usize,
518 vcpu: Option<Arc<dyn VcpuArch>>,
519 vcpu_init: VcpuInitArch,
520 vm: Arc<dyn VmArch>,
521 irq_chip: Arc<dyn IrqChipArch>,
522 vcpu_count: usize,
523 run_rt: bool,
524 vcpu_affinity: CpuSet,
525 delay_rt: bool,
526 start_barrier: Arc<Barrier>,
527 mut io_bus: Bus,
528 mut mmio_bus: Bus,
529 mut hypercall_bus: Bus,
530 vm_evt_wrtube: SendTube,
531 from_main_tube: mpsc::Receiver<VcpuControl>,
532 #[cfg(feature = "gdb")] to_gdb_tube: Option<mpsc::Sender<VcpuDebugStatusMessage>>,
533 enable_core_scheduling: bool,
534 enable_per_vm_core_scheduling: bool,
535 cpu_config: Option<CpuConfigArch>,
536 vcpu_cgroup_tasks_file: Option<File>,
537 #[cfg(target_arch = "x86_64")] bus_lock_ratelimit_ctrl: Arc<Mutex<Ratelimit>>,
538 run_mode: VmRunMode,
539 boost_uclamp: bool,
540 vcpu_pid_tid_tube: mpsc::Sender<VcpuPidTid>,
541) -> Result<JoinHandle<()>> {
542 thread::Builder::new()
543 .name(format!("crosvm_vcpu{cpu_id}"))
544 .spawn(move || {
545 let vcpu_fn = || -> ExitState {
549 if let Err(e) = set_vcpu_thread_scheduling(
550 vcpu_affinity,
551 enable_core_scheduling,
552 enable_per_vm_core_scheduling,
553 vcpu_cgroup_tasks_file,
554 run_rt && !delay_rt,
555 boost_uclamp,
556 ) {
557 error!("vcpu thread setup failed: {:#}", e);
558 return ExitState::Stop;
559 }
560
561 if let Err(e) = vcpu_pid_tid_tube.send(VcpuPidTid {
562 vcpu_id: cpu_id,
563 process_id: process::id(),
564 thread_id: gettid() as u32,
565 }) {
566 error!("Failed to send vcpu process/thread id: {:#}", e);
567 return ExitState::Crash;
568 }
569
570 #[cfg(feature = "gdb")]
571 let guest_mem = vm.get_memory().clone();
572
573 let runnable_vcpu = runnable_vcpu(
574 cpu_id,
575 vcpu_id,
576 vcpu,
577 vcpu_init,
578 vm,
579 irq_chip.as_ref(),
580 vcpu_count,
581 cpu_config,
582 );
583
584 start_barrier.wait();
585
586 let vcpu = match runnable_vcpu {
587 Ok(v) => v,
588 Err(e) => {
589 error!("failed to start vcpu {}: {:#}", cpu_id, e);
590 return ExitState::Stop;
591 }
592 };
593
594 set_vcpu_thread_local(Some(&*vcpu), SIGRTMIN() + 0);
595
596 mmio_bus.set_access_id(cpu_id);
597 io_bus.set_access_id(cpu_id);
598 hypercall_bus.set_access_id(cpu_id);
599
600 let vcpu_exit_state = vcpu_loop(
601 run_mode,
602 cpu_id,
603 vcpu,
604 irq_chip.clone(),
605 run_rt,
606 delay_rt,
607 io_bus,
608 mmio_bus,
609 hypercall_bus,
610 from_main_tube,
611 #[cfg(feature = "gdb")]
612 to_gdb_tube,
613 #[cfg(feature = "gdb")]
614 guest_mem,
615 #[cfg(target_arch = "x86_64")]
616 bus_lock_ratelimit_ctrl,
617 );
618
619 let _ = block_signal(SIGRTMIN() + 0);
621 set_vcpu_thread_local(None, SIGRTMIN() + 0);
622
623 vcpu_exit_state
624 };
625
626 let final_event_data = match vcpu_fn() {
627 ExitState::Stop => VmEventType::Exit,
628 ExitState::Reset => VmEventType::Reset,
629 ExitState::Crash => VmEventType::Crash,
630 ExitState::GuestPanic => VmEventType::GuestPanic,
631 ExitState::WatchdogReset => VmEventType::WatchdogReset,
632 };
633 if let Err(e) = vm_evt_wrtube.send::<VmEventType>(&final_event_data) {
634 error!(
635 "failed to send final event {:?} on vcpu {}: {}",
636 final_event_data, cpu_id, e
637 )
638 }
639 })
640 .context("failed to spawn VCPU thread")
641}
642
643pub fn kick_all_vcpus(
648 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
649 irq_chip: &dyn IrqChip,
650 message: VcpuControl,
651) {
652 for (handle, tube) in vcpu_handles {
653 if let Err(e) = tube.send(message.clone()) {
654 error!("failed to send VcpuControl: {}", e);
655 }
656 let _ = handle.kill(SIGRTMIN() + 0);
657 }
658 irq_chip.kick_halted_vcpus();
659}
660
661pub fn kick_vcpu(
666 vcpu_handle: &Option<&(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)>,
667 irq_chip: &dyn IrqChip,
668 message: VcpuControl,
669) {
670 if let Some((handle, tube)) = vcpu_handle {
671 if let Err(e) = tube.send(message) {
672 error!("failed to send VcpuControl: {}", e);
673 }
674 let _ = handle.kill(SIGRTMIN() + 0);
675 }
676 irq_chip.kick_halted_vcpus();
677}