1#[cfg(target_os = "android")]
6mod android;
7pub mod cmdline;
8pub mod config;
9mod device_helpers;
10pub(crate) mod ext2;
11#[cfg(feature = "gpu")]
12pub(crate) mod gpu;
13#[cfg(feature = "pci-hotplug")]
14pub(crate) mod jail_warden;
15#[cfg(feature = "pci-hotplug")]
16pub(crate) mod pci_hotplug_helpers;
17#[cfg(feature = "pci-hotplug")]
18pub(crate) mod pci_hotplug_manager;
19mod vcpu;
20
21#[cfg(all(feature = "pvclock", target_arch = "aarch64"))]
22use std::arch::asm;
23use std::cmp::max;
24use std::collections::BTreeMap;
25use std::collections::BTreeSet;
26#[cfg(feature = "registered_events")]
27use std::collections::HashMap;
28#[cfg(feature = "registered_events")]
29use std::collections::HashSet;
30use std::convert::TryInto;
31use std::ffi::CString;
32#[cfg(target_arch = "aarch64")]
33use std::fs::create_dir_all;
34use std::fs::File;
35use std::fs::OpenOptions;
36#[cfg(feature = "registered_events")]
37use std::hash::Hash;
38use std::io::stdin;
39use std::iter;
40use std::mem;
41#[cfg(target_arch = "x86_64")]
42use std::ops::RangeInclusive;
43use std::os::unix::process::ExitStatusExt;
44use std::path::Path;
45#[cfg(target_arch = "aarch64")]
46use std::path::PathBuf;
47#[cfg(target_arch = "aarch64")]
48use std::process;
49#[cfg(feature = "registered_events")]
50use std::rc::Rc;
51use std::sync::mpsc;
52use std::sync::Arc;
53use std::sync::Barrier;
54use std::thread::JoinHandle;
55
56#[cfg(target_arch = "aarch64")]
57use aarch64::AArch64 as Arch;
58use acpi_tables::sdt::SDT;
59use anyhow::anyhow;
60use anyhow::bail;
61use anyhow::Context;
62use anyhow::Result;
63use arch::DtbOverlay;
64use arch::IrqChipArch;
65use arch::LinuxArch;
66use arch::RunnableLinuxVm;
67use arch::VcpuAffinity;
68use arch::VcpuArch;
69use arch::VirtioDeviceStub;
70use arch::VmArch;
71use arch::VmComponents;
72use arch::VmImage;
73use arch::DEFAULT_CPU_CAPACITY;
74use argh::FromArgs;
75use base::ReadNotifier;
76#[cfg(feature = "balloon")]
77use base::UnixSeqpacket;
78use base::UnixSeqpacketListener;
79use base::UnlinkUnixSeqpacketListener;
80use base::*;
81use cros_async::Executor;
82use device_helpers::*;
83use devices::create_devices_worker_thread;
84use devices::serial_device::SerialHardware;
85#[cfg(all(feature = "pvclock", target_arch = "x86_64"))]
86use devices::tsc::get_tsc_sync_mitigations;
87use devices::vfio::VfioContainerManager;
88#[cfg(feature = "gpu")]
89use devices::virtio;
90#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
91use devices::virtio::device_constants::video::VideoDeviceType;
92#[cfg(feature = "gpu")]
93use devices::virtio::gpu::EventDevice;
94#[cfg(target_arch = "x86_64")]
95use devices::virtio::memory_mapper::MemoryMapper;
96use devices::virtio::memory_mapper::MemoryMapperTrait;
97use devices::virtio::vhost_user_backend::VhostUserConnectionTrait;
98use devices::virtio::vhost_user_backend::VhostUserListener;
99#[cfg(feature = "balloon")]
100use devices::virtio::BalloonFeatures;
101#[cfg(feature = "pci-hotplug")]
102use devices::virtio::NetParameters;
103#[cfg(feature = "pci-hotplug")]
104use devices::virtio::NetParametersMode;
105use devices::virtio::VirtioDevice;
106use devices::virtio::VirtioDeviceType;
107use devices::Bus;
108use devices::BusDeviceObj;
109use devices::BusType;
110use devices::CoIommuDev;
111#[cfg(feature = "usb")]
112use devices::DeviceProvider;
113#[cfg(target_arch = "x86_64")]
114use devices::HotPlugBus;
115#[cfg(target_arch = "x86_64")]
116use devices::HotPlugKey;
117use devices::IommuDevType;
118use devices::IrqEventIndex;
119use devices::IrqEventSource;
120#[cfg(feature = "pci-hotplug")]
121use devices::NetResourceCarrier;
122#[cfg(target_arch = "x86_64")]
123use devices::PciAddress;
124#[cfg(target_arch = "x86_64")]
125use devices::PciBridge;
126use devices::PciDevice;
127#[cfg(target_arch = "x86_64")]
128use devices::PciMmioMapper;
129#[cfg(target_arch = "x86_64")]
130use devices::PciRoot;
131#[cfg(target_arch = "x86_64")]
132use devices::PciRootCommand;
133#[cfg(target_arch = "x86_64")]
134use devices::PcieDownstreamPort;
135#[cfg(target_arch = "x86_64")]
136use devices::PcieHostPort;
137#[cfg(target_arch = "x86_64")]
138use devices::PcieRootPort;
139#[cfg(target_arch = "x86_64")]
140use devices::PcieUpstreamPort;
141use devices::PvPanicCode;
142use devices::PvPanicPciDevice;
143#[cfg(feature = "pci-hotplug")]
144use devices::ResourceCarrier;
145use devices::StubPciDevice;
146use devices::VirtioPciDevice;
147#[cfg(feature = "usb")]
148use devices::XhciController;
149#[cfg(feature = "gpu")]
150use gpu::*;
151#[cfg(target_arch = "riscv64")]
152use hypervisor::CpuConfigRiscv64;
153#[cfg(target_arch = "x86_64")]
154use hypervisor::CpuConfigX86_64;
155use hypervisor::Hypervisor;
156use hypervisor::HypervisorCap;
157use hypervisor::MemCacheType;
158use hypervisor::ProtectionType;
159use hypervisor::Vm;
160use hypervisor::VmCap;
161use jail::*;
162#[cfg(feature = "pci-hotplug")]
163use jail_warden::JailWarden;
164#[cfg(feature = "pci-hotplug")]
165use jail_warden::JailWardenImpl;
166#[cfg(feature = "pci-hotplug")]
167use jail_warden::PermissiveJailWarden;
168use libc;
169use metrics::MetricsController;
170use minijail::Minijail;
171#[cfg(feature = "pci-hotplug")]
172use pci_hotplug_manager::PciHotPlugManager;
173use resources::AddressRange;
174use resources::Alloc;
175use resources::SystemAllocator;
176#[cfg(target_arch = "riscv64")]
177use riscv64::Riscv64 as Arch;
178use rutabaga_gfx::RutabagaGralloc;
179use rutabaga_gfx::RutabagaGrallocBackendFlags;
180use smallvec::SmallVec;
181#[cfg(feature = "swap")]
182use swap::SwapController;
183use sync::Condvar;
184use sync::Mutex;
185use vm_control::api::VmMemoryClient;
186use vm_control::*;
187use vm_memory::FileBackedMappingParameters;
188use vm_memory::GuestAddress;
189use vm_memory::GuestMemory;
190use vm_memory::MemoryPolicy;
191use vm_memory::MemoryRegionOptions;
192#[cfg(target_arch = "x86_64")]
193use x86_64::X8664arch as Arch;
194
195use crate::crosvm::config::Config;
196use crate::crosvm::config::Executable;
197use crate::crosvm::config::HypervisorKind;
198use crate::crosvm::config::InputDeviceOption;
199use crate::crosvm::config::IrqChipKind;
200use crate::crosvm::config::DEFAULT_TOUCH_DEVICE_HEIGHT;
201use crate::crosvm::config::DEFAULT_TOUCH_DEVICE_WIDTH;
202#[cfg(feature = "gdb")]
203use crate::crosvm::gdb::gdb_thread;
204#[cfg(feature = "gdb")]
205use crate::crosvm::gdb::GdbStub;
206#[cfg(target_arch = "x86_64")]
207use crate::crosvm::ratelimit::Ratelimit;
208use crate::crosvm::sys::cmdline::DevicesCommand;
209use crate::crosvm::sys::config::SharedDir;
210use crate::crosvm::sys::config::SharedDirKind;
211use crate::crosvm::sys::platform::vcpu::VcpuPidTid;
212
213const KVM_PATH: &str = "/dev/kvm";
214#[cfg(all(target_arch = "aarch64", feature = "geniezone"))]
215const GENIEZONE_PATH: &str = "/dev/gzvm";
216#[cfg(all(target_arch = "aarch64", feature = "gunyah"))]
217static GUNYAH_PATH: &str = "/dev/gunyah";
218#[cfg(target_arch = "aarch64")]
219#[cfg(feature = "halla")]
220const HALLA_PATH: &str = "/dev/halla";
221
222fn create_virtio_devices(
223 cfg: &Config,
224 vm: &mut impl VmArch,
225 resources: &mut SystemAllocator,
226 add_control_tube: &mut impl FnMut(AnyControlTube),
227 #[cfg_attr(not(feature = "gpu"), allow(unused_variables))] vm_evt_wrtube: &SendTube,
228 #[cfg(feature = "balloon")] balloon_inflate_tube: Option<Tube>,
229 worker_process_pids: &mut BTreeSet<Pid>,
230 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
231 #[cfg(feature = "gpu")] has_vfio_gfx_device: bool,
232 #[cfg(feature = "registered_events")] registered_evt_q: &SendTube,
233) -> DeviceResult<Vec<VirtioDeviceStub>> {
234 let mut devs = Vec::new();
235
236 #[cfg(any(feature = "gpu", feature = "video-decoder", feature = "video-encoder"))]
237 let mut resource_bridges = Vec::<Tube>::new();
238
239 if !cfg.wayland_socket_paths.is_empty() {
240 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
241 let mut wl_resource_bridge = None::<Tube>;
242
243 #[cfg(feature = "gpu")]
244 {
245 if cfg.gpu_parameters.is_some() {
246 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
247 resource_bridges.push(gpu_socket);
248 wl_resource_bridge = Some(wl_socket);
249 }
250 }
251
252 devs.push(create_wayland_device(
253 cfg.protection_type,
254 cfg.jail_config.as_ref(),
255 &cfg.wayland_socket_paths,
256 wl_resource_bridge,
257 )?);
258 }
259
260 #[cfg(all(feature = "media", feature = "video-decoder"))]
261 let media_adapter_cfg = cfg
262 .media_decoder
263 .iter()
264 .map(|config| {
265 let (video_tube, gpu_tube) =
266 Tube::pair().expect("failed to create tube for media adapter");
267 resource_bridges.push(gpu_tube);
268 (video_tube, config.backend)
269 })
270 .collect::<Vec<_>>();
271
272 #[cfg(feature = "video-decoder")]
273 let video_dec_cfg = cfg
274 .video_dec
275 .iter()
276 .map(|config| {
277 let (video_tube, gpu_tube) =
278 Tube::pair().expect("failed to create tube for video decoder");
279 resource_bridges.push(gpu_tube);
280 (video_tube, config.backend)
281 })
282 .collect::<Vec<_>>();
283
284 #[cfg(feature = "video-encoder")]
285 let video_enc_cfg = cfg
286 .video_enc
287 .iter()
288 .map(|config| {
289 let (video_tube, gpu_tube) =
290 Tube::pair().expect("failed to create tube for video encoder");
291 resource_bridges.push(gpu_tube);
292 (video_tube, config.backend)
293 })
294 .collect::<Vec<_>>();
295
296 #[cfg(feature = "gpu")]
297 {
298 if let Some(gpu_parameters) = &cfg.gpu_parameters {
299 let mut event_devices = Vec::new();
300 if cfg.display_window_mouse {
301 let display_param = if gpu_parameters.display_params.is_empty() {
302 Default::default()
303 } else {
304 gpu_parameters.display_params[0].clone()
305 };
306 let (gpu_display_w, gpu_display_h) = display_param.get_virtual_display_size();
307
308 let (event_device_socket, virtio_dev_socket) =
309 StreamChannel::pair(BlockingMode::Nonblocking, FramingMode::Byte)
310 .context("failed to create socket")?;
311 let mut multi_touch_width = gpu_display_w;
312 let mut multi_touch_height = gpu_display_h;
313 let mut multi_touch_name = None;
314 for input in &cfg.virtio_input {
315 if let InputDeviceOption::MultiTouch {
316 width,
317 height,
318 name,
319 ..
320 } = input
321 {
322 if let Some(width) = width {
323 multi_touch_width = *width;
324 }
325 if let Some(height) = height {
326 multi_touch_height = *height;
327 }
328 if let Some(name) = name {
329 multi_touch_name = Some(name.as_str());
330 }
331 break;
332 }
333 }
334 let dev = virtio::input::new_multi_touch(
335 u32::MAX,
338 virtio_dev_socket,
339 multi_touch_width,
340 multi_touch_height,
341 multi_touch_name,
342 virtio::base_features(cfg.protection_type),
343 )
344 .context("failed to set up mouse device")?;
345 devs.push(VirtioDeviceStub {
346 dev: Box::new(dev),
347 jail: simple_jail(cfg.jail_config.as_ref(), "input_device")?,
348 });
349 event_devices.push(EventDevice::touchscreen(event_device_socket));
350 }
351 if cfg.display_window_keyboard {
352 let (event_device_socket, virtio_dev_socket) =
353 StreamChannel::pair(BlockingMode::Nonblocking, FramingMode::Byte)
354 .context("failed to create socket")?;
355 let dev = virtio::input::new_keyboard(
356 u32::MAX,
359 virtio_dev_socket,
360 virtio::base_features(cfg.protection_type),
361 )
362 .context("failed to set up keyboard device")?;
363 devs.push(VirtioDeviceStub {
364 dev: Box::new(dev),
365 jail: simple_jail(cfg.jail_config.as_ref(), "input_device")?,
366 });
367 event_devices.push(EventDevice::keyboard(event_device_socket));
368 }
369
370 let (gpu_control_host_tube, gpu_control_device_tube) =
371 Tube::pair().context("failed to create gpu tube")?;
372 add_control_tube(DeviceControlTube::Gpu(gpu_control_host_tube).into());
373 devs.push(create_gpu_device(
374 cfg,
375 vm_evt_wrtube,
376 gpu_control_device_tube,
377 resource_bridges,
378 render_server_fd,
379 has_vfio_gfx_device,
380 event_devices,
381 )?);
382 }
383 }
384
385 for (_, param) in cfg
386 .serial_parameters
387 .iter()
388 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
389 {
390 let dev =
391 param.create_virtio_device_and_jail(cfg.protection_type, cfg.jail_config.as_ref())?;
392 devs.push(dev);
393 }
394
395 for disk in &cfg.disks {
396 let (disk_host_tube, disk_device_tube) = Tube::pair().context("failed to create tube")?;
397 add_control_tube(DeviceControlTube::Disk(disk_host_tube).into());
398 let disk_config = DiskConfig::new(disk, Some(disk_device_tube));
399 devs.push(
400 disk_config
401 .create_virtio_device_and_jail(cfg.protection_type, cfg.jail_config.as_ref())?,
402 );
403 }
404
405 if !cfg.scsis.is_empty() {
406 let scsi_config = ScsiConfig(&cfg.scsis);
407 devs.push(
408 scsi_config
409 .create_virtio_device_and_jail(cfg.protection_type, cfg.jail_config.as_ref())?,
410 );
411 }
412
413 for (index, pmem_disk) in cfg.pmems.iter().enumerate() {
414 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
415 add_control_tube(TaggedControlTube::VmMsync(pmem_host_tube).into());
416 devs.push(create_pmem_device(
417 cfg.protection_type,
418 cfg.jail_config.as_ref(),
419 vm,
420 resources,
421 pmem_disk,
422 index,
423 pmem_device_tube,
424 )?);
425 }
426
427 for (index, pmem_ext2) in cfg.pmem_ext2.iter().enumerate() {
428 let (pmem_ext2_host_tube, pmem_ext2_device_tube) =
431 Tube::pair().context("failed to create tube")?;
432 let vm_memory_client = VmMemoryClient::new(pmem_ext2_device_tube);
433 add_control_tube(
434 VmMemoryTube {
435 tube: pmem_ext2_host_tube,
436 expose_with_viommu: false,
437 }
438 .into(),
439 );
440 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
441 add_control_tube(TaggedControlTube::VmMsync(pmem_host_tube).into());
442 devs.push(create_pmem_ext2_device(
443 cfg.protection_type,
444 cfg.jail_config.as_ref(),
445 resources,
446 pmem_ext2,
447 index,
448 vm_memory_client,
449 pmem_device_tube,
450 worker_process_pids,
451 )?);
452 }
453
454 if cfg.rng {
455 devs.push(create_virtio_rng_device(
456 cfg.protection_type,
457 cfg.jail_config.as_ref(),
458 )?);
459 }
460
461 #[cfg(feature = "pvclock")]
462 if cfg.pvclock {
463 let (host_suspend_tube, suspend_tube) = Tube::pair().context("failed to create tube")?;
465 add_control_tube(DeviceControlTube::PvClock(host_suspend_tube).into());
466
467 let frequency: u64;
468 #[cfg(target_arch = "x86_64")]
469 {
470 let tsc_state = devices::tsc::tsc_state()?;
471 let tsc_sync_mitigations =
472 get_tsc_sync_mitigations(&tsc_state, cfg.vcpu_count.unwrap_or(1));
473 if tsc_state.core_grouping.size() > 1 {
474 warn!(
476 "Host TSCs are not in sync, applying the following mitigations: {:?}",
477 tsc_sync_mitigations
478 );
479 }
480 frequency = tsc_state.frequency;
481 }
482 #[cfg(target_arch = "aarch64")]
483 {
484 let mut x: u64;
485 unsafe {
488 asm!("mrs {x}, cntfrq_el0",
489 x = out(reg) x,
490 );
491 }
492 frequency = x;
493
494 vm.set_counter_offset(0)
498 .context("failed to set up pvclock")?;
499 }
500 let dev = create_pvclock_device(
501 cfg.protection_type,
502 cfg.jail_config.as_ref(),
503 frequency,
504 suspend_tube,
505 )?;
506 devs.push(dev);
507 info!("virtio-pvclock is enabled for this vm");
508 }
509
510 #[cfg(feature = "vtpm")]
511 {
512 if cfg.vtpm_proxy {
513 devs.push(create_vtpm_proxy_device(
514 cfg.protection_type,
515 cfg.jail_config.as_ref(),
516 )?);
517 }
518 }
519
520 let mut keyboard_idx = 0;
521 let mut mouse_idx = 0;
522 let mut rotary_idx = 0;
523 let mut switches_idx = 0;
524 let mut multi_touch_idx = 0;
525 let mut single_touch_idx = 0;
526 let mut trackpad_idx = 0;
527 let mut multi_touch_trackpad_idx = 0;
528 let mut custom_idx = 0;
529 for input in &cfg.virtio_input {
530 let input_dev = match input {
531 InputDeviceOption::Evdev { path } => create_vinput_device(
532 cfg.protection_type,
533 cfg.jail_config.as_ref(),
534 path.as_path(),
535 )?,
536 InputDeviceOption::Keyboard { path } => {
537 let dev = create_keyboard_device(
538 cfg.protection_type,
539 cfg.jail_config.as_ref(),
540 path.as_path(),
541 keyboard_idx,
542 )?;
543 keyboard_idx += 1;
544 dev
545 }
546 InputDeviceOption::Mouse { path } => {
547 let dev = create_mouse_device(
548 cfg.protection_type,
549 cfg.jail_config.as_ref(),
550 path.as_path(),
551 mouse_idx,
552 )?;
553 mouse_idx += 1;
554 dev
555 }
556 InputDeviceOption::MultiTouch {
557 path,
558 width,
559 height,
560 name,
561 } => {
562 let mut width = *width;
563 let mut height = *height;
564 if multi_touch_idx == 0 {
565 if width.is_none() {
566 width = cfg.display_input_width;
567 }
568 if height.is_none() {
569 height = cfg.display_input_height;
570 }
571 }
572 let dev = create_multi_touch_device(
573 cfg.protection_type,
574 cfg.jail_config.as_ref(),
575 path.as_path(),
576 width.unwrap_or(DEFAULT_TOUCH_DEVICE_WIDTH),
577 height.unwrap_or(DEFAULT_TOUCH_DEVICE_HEIGHT),
578 name.as_deref(),
579 multi_touch_idx,
580 )?;
581 multi_touch_idx += 1;
582 dev
583 }
584 InputDeviceOption::Rotary { path } => {
585 let dev = create_rotary_device(
586 cfg.protection_type,
587 cfg.jail_config.as_ref(),
588 path.as_path(),
589 rotary_idx,
590 )?;
591 rotary_idx += 1;
592 dev
593 }
594 InputDeviceOption::SingleTouch {
595 path,
596 width,
597 height,
598 name,
599 } => {
600 let mut width = *width;
601 let mut height = *height;
602 if single_touch_idx == 0 {
603 if width.is_none() {
604 width = cfg.display_input_width;
605 }
606 if height.is_none() {
607 height = cfg.display_input_height;
608 }
609 }
610 let dev = create_single_touch_device(
611 cfg.protection_type,
612 cfg.jail_config.as_ref(),
613 path.as_path(),
614 width.unwrap_or(DEFAULT_TOUCH_DEVICE_WIDTH),
615 height.unwrap_or(DEFAULT_TOUCH_DEVICE_HEIGHT),
616 name.as_deref(),
617 single_touch_idx,
618 )?;
619 single_touch_idx += 1;
620 dev
621 }
622 InputDeviceOption::Switches { path } => {
623 let dev = create_switches_device(
624 cfg.protection_type,
625 cfg.jail_config.as_ref(),
626 path.as_path(),
627 switches_idx,
628 )?;
629 switches_idx += 1;
630 dev
631 }
632 InputDeviceOption::Trackpad {
633 path,
634 width,
635 height,
636 name,
637 } => {
638 let dev = create_trackpad_device(
639 cfg.protection_type,
640 cfg.jail_config.as_ref(),
641 path.as_path(),
642 width.unwrap_or(DEFAULT_TOUCH_DEVICE_WIDTH),
643 height.unwrap_or(DEFAULT_TOUCH_DEVICE_HEIGHT),
644 name.as_deref(),
645 trackpad_idx,
646 )?;
647 trackpad_idx += 1;
648 dev
649 }
650 InputDeviceOption::MultiTouchTrackpad {
651 path,
652 width,
653 height,
654 name,
655 } => {
656 let dev = create_multitouch_trackpad_device(
657 cfg.protection_type,
658 cfg.jail_config.as_ref(),
659 path.as_path(),
660 width.unwrap_or(DEFAULT_TOUCH_DEVICE_WIDTH),
661 height.unwrap_or(DEFAULT_TOUCH_DEVICE_HEIGHT),
662 name.as_deref(),
663 multi_touch_trackpad_idx,
664 )?;
665 multi_touch_trackpad_idx += 1;
666 dev
667 }
668 InputDeviceOption::Custom { path, config_path } => {
669 let dev = create_custom_device(
670 cfg.protection_type,
671 cfg.jail_config.as_ref(),
672 path.as_path(),
673 custom_idx,
674 config_path.clone(),
675 )?;
676 custom_idx += 1;
677 dev
678 }
679 };
680 devs.push(input_dev);
681 }
682
683 #[cfg(feature = "balloon")]
684 if cfg.balloon {
685 let balloon_device_tube = if let Some(ref path) = cfg.balloon_control {
686 Tube::try_from(UnixSeqpacket::connect(path).with_context(|| {
687 format!(
688 "failed to connect to balloon control socket {}",
689 path.display(),
690 )
691 })?)?
692 } else {
693 let (host, device) = Tube::pair().context("failed to create tube")?;
696 add_control_tube(DeviceControlTube::Balloon(host).into());
697 device
698 };
699
700 let balloon_features = (cfg.balloon_page_reporting as u64)
701 << BalloonFeatures::PageReporting as u64
702 | (cfg.balloon_ws_reporting as u64) << BalloonFeatures::WSReporting as u64;
703
704 let init_balloon_size = if let Some(init_memory) = cfg.init_memory {
705 let init_memory_bytes = init_memory.saturating_mul(1024 * 1024);
706 let total_memory_bytes = vm.get_memory().memory_size();
707
708 if init_memory_bytes > total_memory_bytes {
709 bail!(
710 "initial memory {} cannot be greater than total memory {}",
711 init_memory,
712 total_memory_bytes / (1024 * 1024),
713 );
714 }
715
716 total_memory_bytes - init_memory_bytes
718 } else {
719 0
721 };
722
723 let (dynamic_mapping_host_tube, dynamic_mapping_device_tube) =
726 Tube::pair().context("failed to create tube")?;
727 add_control_tube(
728 VmMemoryTube {
729 tube: dynamic_mapping_host_tube,
730 expose_with_viommu: false,
731 }
732 .into(),
733 );
734
735 devs.push(create_balloon_device(
736 cfg.protection_type,
737 cfg.jail_config.as_ref(),
738 balloon_device_tube,
739 balloon_inflate_tube,
740 init_balloon_size,
741 VmMemoryClient::new(dynamic_mapping_device_tube),
742 balloon_features,
743 #[cfg(feature = "registered_events")]
744 Some(
745 registered_evt_q
746 .try_clone()
747 .context("failed to clone registered_evt_q tube")?,
748 ),
749 cfg.balloon_ws_num_bins,
750 )?);
751 }
752
753 #[cfg(feature = "net")]
754 for opt in &cfg.net {
755 let dev =
756 opt.create_virtio_device_and_jail(cfg.protection_type, cfg.jail_config.as_ref())?;
757 devs.push(dev);
758 }
759
760 #[cfg(feature = "audio")]
761 {
762 for (card_index, virtio_snd) in cfg.virtio_snds.iter().enumerate() {
763 let (snd_host_tube, snd_device_tube) =
764 Tube::pair().context("failed to create tube for snd")?;
765 add_control_tube(DeviceControlTube::Snd(snd_host_tube).into());
766 let mut snd_params = virtio_snd.clone();
767 snd_params.card_index = card_index;
768 devs.push(create_virtio_snd_device(
769 cfg.protection_type,
770 cfg.jail_config.as_ref(),
771 snd_params,
772 snd_device_tube,
773 )?);
774 }
775 }
776
777 #[cfg(any(target_os = "android", target_os = "linux"))]
778 #[cfg(feature = "media")]
779 {
780 for v4l2_device in &cfg.v4l2_proxy {
781 devs.push(create_v4l2_device(cfg.protection_type, v4l2_device)?);
782 }
783 }
784
785 #[cfg(feature = "media")]
786 if cfg.simple_media_device {
787 devs.push(create_simple_media_device(cfg.protection_type)?);
788 }
789
790 #[cfg(all(feature = "media", feature = "video-decoder"))]
791 {
792 for (tube, backend) in media_adapter_cfg {
793 devs.push(create_virtio_media_adapter(
794 cfg.protection_type,
795 cfg.jail_config.as_ref(),
796 tube,
797 backend,
798 )?);
799 }
800 }
801
802 #[cfg(feature = "video-decoder")]
803 {
804 for (tube, backend) in video_dec_cfg {
805 register_video_device(
806 backend,
807 &mut devs,
808 tube,
809 cfg.protection_type,
810 cfg.jail_config.as_ref(),
811 VideoDeviceType::Decoder,
812 )?;
813 }
814 }
815
816 #[cfg(feature = "video-encoder")]
817 {
818 for (tube, backend) in video_enc_cfg {
819 register_video_device(
820 backend,
821 &mut devs,
822 tube,
823 cfg.protection_type,
824 cfg.jail_config.as_ref(),
825 VideoDeviceType::Encoder,
826 )?;
827 }
828 }
829
830 if let Some(vsock_config) = &cfg.vsock {
831 devs.push(
832 vsock_config
833 .create_virtio_device_and_jail(cfg.protection_type, cfg.jail_config.as_ref())?,
834 );
835 }
836
837 #[cfg(target_arch = "aarch64")]
838 {
839 if cfg.vhost_scmi {
840 devs.push(create_vhost_scmi_device(
841 cfg.protection_type,
842 cfg.jail_config.as_ref(),
843 cfg.vhost_scmi_device.clone(),
844 )?);
845 }
846 }
847
848 for shared_dir in &cfg.shared_dirs {
849 let SharedDir {
850 src,
851 tag,
852 kind,
853 ugid,
854 uid_map,
855 gid_map,
856 fs_cfg,
857 p9_cfg,
858 } = shared_dir;
859
860 let dev = match kind {
861 SharedDirKind::FS => {
862 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
863 add_control_tube(TaggedControlTube::Fs(host_tube).into());
864
865 create_fs_device(
866 cfg.protection_type,
867 cfg.jail_config.as_ref(),
868 *ugid,
869 uid_map,
870 gid_map,
871 src,
872 tag,
873 fs_cfg.clone(),
874 device_tube,
875 )?
876 }
877 SharedDirKind::P9 => create_9p_device(
878 cfg.protection_type,
879 cfg.jail_config.as_ref(),
880 *ugid,
881 uid_map,
882 gid_map,
883 src,
884 tag,
885 p9_cfg.clone(),
886 )?,
887 };
888 devs.push(dev);
889 }
890
891 #[cfg(feature = "audio")]
892 if let Some(path) = &cfg.sound {
893 devs.push(create_sound_device(
894 path,
895 cfg.protection_type,
896 cfg.jail_config.as_ref(),
897 )?);
898 }
899
900 for opt in &cfg.vhost_user {
901 devs.push(create_vhost_user_frontend(
902 cfg.protection_type,
903 opt,
904 cfg.vhost_user_connect_timeout_ms,
905 vm_evt_wrtube.try_clone()?,
906 )?);
907 }
908
909 Ok(devs)
910}
911
912fn create_devices(
913 cfg: &Config,
914 vm: &mut impl VmArch,
915 resources: &mut SystemAllocator,
916 add_control_tube: &mut impl FnMut(AnyControlTube),
917 vm_evt_wrtube: &SendTube,
918 iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
919 #[cfg(feature = "usb")] usb_provider: DeviceProvider,
920 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
921 iova_max_addr: &mut Option<u64>,
922 #[cfg(feature = "registered_events")] registered_evt_q: &SendTube,
923 vfio_container_manager: &mut VfioContainerManager,
924 worker_process_pids: &mut BTreeSet<Pid>,
926) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
927 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
928 #[cfg(feature = "balloon")]
929 let mut balloon_inflate_tube: Option<Tube> = None;
930 #[cfg(feature = "gpu")]
931 let mut has_vfio_gfx_device = false;
932 if !cfg.vfio.is_empty() {
933 let mut coiommu_attached_endpoints = Vec::new();
934
935 for vfio_dev in &cfg.vfio {
936 let (dev, jail, viommu_mapper) = create_vfio_device(
937 cfg.jail_config.as_ref(),
938 vm,
939 resources,
940 add_control_tube,
941 &vfio_dev.path,
942 false,
943 None,
944 vfio_dev.guest_address,
945 Some(&mut coiommu_attached_endpoints),
946 vfio_dev.iommu,
947 vfio_dev.dt_symbol.clone(),
948 vfio_container_manager,
949 )?;
950 match dev {
951 VfioDeviceVariant::Pci(vfio_pci_device) => {
952 *iova_max_addr = Some(max(
953 vfio_pci_device.get_max_iova(),
954 iova_max_addr.unwrap_or(0),
955 ));
956
957 #[cfg(feature = "gpu")]
958 if vfio_pci_device.is_gfx() {
959 has_vfio_gfx_device = true;
960 }
961
962 if let Some(viommu_mapper) = viommu_mapper {
963 iommu_attached_endpoints.insert(
964 vfio_pci_device
965 .pci_address()
966 .context("not initialized")?
967 .to_u32(),
968 Arc::new(Mutex::new(Box::new(viommu_mapper))),
969 );
970 }
971
972 devices.push((Box::new(vfio_pci_device), jail));
973 }
974 VfioDeviceVariant::Platform(vfio_plat_dev) => {
975 devices.push((Box::new(vfio_plat_dev), jail));
976 }
977 }
978 }
979
980 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
981 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
982 let res = unsafe { libc::getrlimit64(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
984 if res == 0 {
985 let limit = unsafe { buf.assume_init() };
987 let rlim_new = limit.rlim_cur.saturating_add(vm.get_memory().memory_size());
988 let rlim_max = max(limit.rlim_max, rlim_new);
989 if limit.rlim_cur < rlim_new {
990 let limit_arg = libc::rlimit64 {
991 rlim_cur: rlim_new,
992 rlim_max,
993 };
994 let res = unsafe { libc::setrlimit64(libc::RLIMIT_MEMLOCK, &limit_arg) };
996 if res != 0 {
997 bail!("Set rlimit failed");
998 }
999 }
1000 } else {
1001 bail!("Get rlimit failed");
1002 }
1003 }
1004 #[cfg(feature = "balloon")]
1005 let coiommu_tube: Option<Tube>;
1006 #[cfg(not(feature = "balloon"))]
1007 let coiommu_tube: Option<Tube> = None;
1008 if !coiommu_attached_endpoints.is_empty() {
1009 let vfio_container = vfio_container_manager
1010 .get_container(IommuDevType::CoIommu, None as Option<&Path>)
1011 .context("failed to get vfio container")?;
1012 let (coiommu_host_tube, coiommu_device_tube) =
1013 Tube::pair().context("failed to create coiommu tube")?;
1014 add_control_tube(
1015 VmMemoryTube {
1016 tube: coiommu_host_tube,
1017 expose_with_viommu: false,
1018 }
1019 .into(),
1020 );
1021 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
1022 #[cfg(feature = "balloon")]
1023 match Tube::pair() {
1024 Ok((x, y)) => {
1025 coiommu_tube = Some(x);
1026 balloon_inflate_tube = Some(y);
1027 }
1028 Err(x) => return Err(x).context("failed to create coiommu tube"),
1029 }
1030 let dev = CoIommuDev::new(
1031 vm.get_memory().clone(),
1032 vfio_container,
1033 VmMemoryClient::new(coiommu_device_tube),
1034 coiommu_tube,
1035 coiommu_attached_endpoints,
1036 vcpu_count,
1037 cfg.coiommu_param.unwrap_or_default(),
1038 )
1039 .context("failed to create coiommu device")?;
1040
1041 devices.push((
1042 Box::new(dev),
1043 simple_jail(cfg.jail_config.as_ref(), "coiommu_device")?,
1044 ));
1045 }
1046 }
1047
1048 let stubs = create_virtio_devices(
1049 cfg,
1050 vm,
1051 resources,
1052 add_control_tube,
1053 vm_evt_wrtube,
1054 #[cfg(feature = "balloon")]
1055 balloon_inflate_tube,
1056 worker_process_pids,
1057 #[cfg(feature = "gpu")]
1058 render_server_fd,
1059 #[cfg(feature = "gpu")]
1060 has_vfio_gfx_device,
1061 #[cfg(feature = "registered_events")]
1062 registered_evt_q,
1063 )?;
1064
1065 for stub in stubs {
1066 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1067 add_control_tube(AnyControlTube::IrqTube(msi_host_tube));
1068
1069 let shared_memory_tube = if stub.dev.get_shared_memory_region().is_some() {
1070 let (host_tube, device_tube) =
1071 Tube::pair().context("failed to create shared memory tube")?;
1072 add_control_tube(
1073 VmMemoryTube {
1074 tube: host_tube,
1075 expose_with_viommu: stub.dev.expose_shmem_descriptors_with_viommu(),
1076 }
1077 .into(),
1078 );
1079 Some(device_tube)
1080 } else {
1081 None
1082 };
1083
1084 let (ioevent_host_tube, ioevent_device_tube) =
1085 Tube::pair().context("failed to create ioevent tube")?;
1086 add_control_tube(
1087 VmMemoryTube {
1088 tube: ioevent_host_tube,
1089 expose_with_viommu: false,
1090 }
1091 .into(),
1092 );
1093
1094 let (host_tube, device_tube) =
1095 Tube::pair().context("failed to create device control tube")?;
1096 add_control_tube(TaggedControlTube::Vm(host_tube).into());
1097
1098 let dev = VirtioPciDevice::new(
1099 vm.get_memory().clone(),
1100 stub.dev,
1101 msi_device_tube,
1102 cfg.disable_virtio_intx,
1103 shared_memory_tube.map(VmMemoryClient::new),
1104 VmMemoryClient::new(ioevent_device_tube),
1105 device_tube,
1106 )
1107 .context("failed to create virtio pci dev")?;
1108
1109 devices.push((Box::new(dev) as Box<dyn BusDeviceObj>, stub.jail));
1110 }
1111
1112 #[cfg(feature = "usb")]
1113 if cfg.usb {
1114 let usb_controller = Box::new(XhciController::new(
1116 vm.get_memory().clone(),
1117 Box::new(usb_provider),
1118 ));
1119 devices.push((
1120 usb_controller,
1121 simple_jail(cfg.jail_config.as_ref(), "xhci_device")?,
1122 ));
1123 }
1124
1125 for params in &cfg.stub_pci_devices {
1126 devices.push((Box::new(StubPciDevice::new(params)), None));
1128 }
1129
1130 devices.push((
1131 Box::new(PvPanicPciDevice::new(vm_evt_wrtube.try_clone()?)),
1132 None,
1133 ));
1134
1135 Ok(devices)
1136}
1137
1138fn create_mmio_file_backed_mappings(
1139 cfg: &Config,
1140 vm: &mut impl Vm,
1141 resources: &mut SystemAllocator,
1142) -> Result<()> {
1143 for mapping in &cfg.file_backed_mappings_mmio {
1144 let file = mapping
1145 .open()
1146 .context("failed to open file for file-backed mapping")?;
1147 let prot = if mapping.writable {
1148 Protection::read_write()
1149 } else {
1150 Protection::read()
1151 };
1152 let size = mapping
1153 .size
1154 .try_into()
1155 .context("Invalid size for file-backed mapping")?;
1156 let memory_mapping = MemoryMappingBuilder::new(size)
1157 .from_file(&file)
1158 .offset(mapping.offset)
1159 .protection(prot)
1160 .build()
1161 .context("failed to map backing file for file-backed mapping")?;
1162
1163 let mapping_range = AddressRange::from_start_and_size(mapping.address, mapping.size)
1164 .context("failed to convert to AddressRange")?;
1165 match resources.mmio_allocator_any().allocate_at(
1166 mapping_range,
1167 Alloc::FileBacked(mapping.address),
1168 "file-backed mapping".to_owned(),
1169 ) {
1170 Ok(()) | Err(resources::Error::OutOfSpace) => {}
1175 e => e.context("failed to allocate guest address for file-backed mapping")?,
1176 }
1177
1178 vm.add_memory_region(
1179 GuestAddress(mapping.address),
1180 Box::new(memory_mapping),
1181 !mapping.writable,
1182 false,
1183 MemCacheType::CacheCoherent,
1184 )
1185 .context("failed to configure file-backed mapping")?;
1186 }
1187
1188 Ok(())
1189}
1190
1191#[cfg(target_arch = "x86_64")]
1192struct HotPlugStub {
1194 hotplug_buses: BTreeMap<u8, Arc<Mutex<dyn HotPlugBus>>>,
1196 iommu_bus_ranges: Vec<RangeInclusive<u32>>,
1198 gpe_notify_devs: BTreeMap<u32, Arc<Mutex<dyn GpeNotify>>>,
1200 pme_notify_devs: BTreeMap<u8, Arc<Mutex<dyn PmeNotify>>>,
1202}
1203
1204#[cfg(target_arch = "x86_64")]
1205impl HotPlugStub {
1206 fn new() -> Self {
1208 Self {
1209 hotplug_buses: BTreeMap::new(),
1210 iommu_bus_ranges: Vec::new(),
1211 gpe_notify_devs: BTreeMap::new(),
1212 pme_notify_devs: BTreeMap::new(),
1213 }
1214 }
1215}
1216
1217#[cfg(target_arch = "x86_64")]
1218fn create_pure_virtual_pcie_root_port(
1223 sys_allocator: &mut SystemAllocator,
1224 add_control_tube: &mut impl FnMut(AnyControlTube),
1225 devices: &mut Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
1226 hp_bus_count: u8,
1227) -> Result<HotPlugStub> {
1228 let mut hp_sec_buses = Vec::new();
1229 let mut hp_stub = HotPlugStub::new();
1230 for i in 1..255 {
1233 if sys_allocator.pci_bus_empty(i) {
1234 if hp_sec_buses.len() < hp_bus_count.into() {
1235 hp_sec_buses.push(i);
1236 }
1237 continue;
1238 }
1239 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(i, false)));
1240 hp_stub
1241 .pme_notify_devs
1242 .insert(i, pcie_root_port.clone() as Arc<Mutex<dyn PmeNotify>>);
1243 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1244 add_control_tube(AnyControlTube::IrqTube(msi_host_tube));
1245 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
1246 devices.push((pci_bridge, None));
1248 }
1249
1250 if hp_sec_buses.len() < hp_bus_count.into() {
1252 return Err(anyhow!("no more addresses are available"));
1253 }
1254
1255 for hp_sec_bus in hp_sec_buses {
1256 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(hp_sec_bus, true)));
1257 hp_stub.pme_notify_devs.insert(
1258 hp_sec_bus,
1259 pcie_root_port.clone() as Arc<Mutex<dyn PmeNotify>>,
1260 );
1261 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1262 add_control_tube(AnyControlTube::IrqTube(msi_host_tube));
1263 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
1264
1265 hp_stub.iommu_bus_ranges.push(RangeInclusive::new(
1266 PciAddress {
1267 bus: pci_bridge.get_secondary_num(),
1268 dev: 0,
1269 func: 0,
1270 }
1271 .to_u32(),
1272 PciAddress {
1273 bus: pci_bridge.get_subordinate_num(),
1274 dev: 32,
1275 func: 8,
1276 }
1277 .to_u32(),
1278 ));
1279
1280 devices.push((pci_bridge, None));
1281 hp_stub
1282 .hotplug_buses
1283 .insert(hp_sec_bus, pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
1284 }
1285 Ok(hp_stub)
1286}
1287
1288fn get_representative_pcpu(vcpu_id: usize, vcpu_affinity: &Option<VcpuAffinity>) -> usize {
1293 match vcpu_affinity {
1294 Some(VcpuAffinity::Global(s)) => s.iter().next().copied().unwrap_or(0),
1296 Some(VcpuAffinity::PerVcpu(m)) => match m.get(&vcpu_id) {
1297 Some(s) => s.iter().next().copied().unwrap_or(vcpu_id),
1298 None => vcpu_id,
1299 },
1300 None => vcpu_id,
1301 }
1302}
1303
1304fn map_vcpu_capacity(
1307 vcpu_count: usize,
1308 vcpu_affinity: &Option<VcpuAffinity>,
1309 host_capacity: &BTreeMap<usize, u32>,
1310) -> anyhow::Result<BTreeMap<usize, u32>> {
1311 let mut mapped_capacity = BTreeMap::new();
1312 for vcpu_id in 0..vcpu_count {
1313 let pcpu_id = get_representative_pcpu(vcpu_id, vcpu_affinity);
1314 let capacity = host_capacity
1315 .get(&pcpu_id)
1316 .copied()
1317 .unwrap_or(DEFAULT_CPU_CAPACITY);
1318 mapped_capacity.insert(vcpu_id, capacity);
1319 }
1320 Ok(mapped_capacity)
1321}
1322
1323fn map_vcpu_clusters(
1326 vcpu_count: usize,
1327 vcpu_affinity: &Option<VcpuAffinity>,
1328 host_clusters: Vec<arch::CpuSet>,
1329) -> anyhow::Result<Vec<arch::CpuSet>> {
1330 let mut pcpu_to_cluster = std::collections::BTreeMap::new();
1331 for (cluster_idx, cluster) in host_clusters.iter().enumerate() {
1332 for pcpu_id in cluster.iter() {
1333 pcpu_to_cluster.insert(*pcpu_id, cluster_idx);
1334 }
1335 }
1336
1337 let mut vcpu_clusters_sets: Vec<std::collections::BTreeSet<usize>> =
1338 vec![std::collections::BTreeSet::new(); host_clusters.len()];
1339
1340 for vcpu_id in 0..vcpu_count {
1341 let pcpu_id = get_representative_pcpu(vcpu_id, vcpu_affinity);
1342
1343 if let Some(&cluster_idx) = pcpu_to_cluster.get(&pcpu_id) {
1344 vcpu_clusters_sets[cluster_idx].insert(vcpu_id);
1345 }
1346 }
1347
1348 Ok(vcpu_clusters_sets
1349 .into_iter()
1350 .filter(|s| !s.is_empty())
1351 .map(arch::CpuSet::new)
1352 .collect())
1353}
1354
1355fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
1356 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1357 Some(
1358 open_file_or_duplicate(initrd_path, OpenOptions::new().read(true))
1359 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
1360 )
1361 } else {
1362 None
1363 };
1364 let pvm_fw_image = if let Some(pvm_fw_path) = &cfg.pvm_fw {
1365 Some(
1366 open_file_or_duplicate(pvm_fw_path, OpenOptions::new().read(true))
1367 .with_context(|| format!("failed to open pvm_fw {}", pvm_fw_path.display()))?,
1368 )
1369 } else {
1370 None
1371 };
1372
1373 let vm_image = match cfg.executable_path {
1374 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1375 open_file_or_duplicate(kernel_path, OpenOptions::new().read(true)).with_context(
1376 || format!("failed to open kernel image {}", kernel_path.display()),
1377 )?,
1378 ),
1379 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
1380 open_file_or_duplicate(bios_path, OpenOptions::new().read(true))
1381 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
1382 ),
1383 _ => panic!("Did not receive a bios or kernel, should be impossible."),
1384 };
1385
1386 let swiotlb = if let Some(size) = cfg.swiotlb {
1387 Some(
1388 size.checked_mul(1024 * 1024)
1389 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
1390 )
1391 } else if matches!(cfg.protection_type, ProtectionType::Unprotected) {
1392 None
1393 } else {
1394 Some(64 * 1024 * 1024)
1395 };
1396
1397 let (pflash_image, pflash_block_size) = if let Some(pflash_parameters) = &cfg.pflash_parameters
1398 {
1399 (
1400 Some(
1401 open_file_or_duplicate(
1402 &pflash_parameters.path,
1403 OpenOptions::new().read(true).write(true),
1404 )
1405 .with_context(|| {
1406 format!("failed to open pflash {}", pflash_parameters.path.display())
1407 })?,
1408 ),
1409 pflash_parameters.block_size,
1410 )
1411 } else {
1412 (None, 0)
1413 };
1414
1415 #[allow(unused_mut)]
1417 let mut vcpu_frequencies: BTreeMap<usize, Vec<u32>> = BTreeMap::new();
1418 #[cfg(target_arch = "aarch64")]
1419 let mut normalized_cpu_ipc_ratios = BTreeMap::new();
1420
1421 let fw_cfg_enable = cfg.enable_fw_cfg || !cfg.fw_cfg_parameters.is_empty();
1423 let (vcpu_clusters, vcpu_capacity) = if cfg.host_cpu_topology {
1424 let host_capacity = Arch::get_host_cpu_capacity()?;
1425 let mapped_capacity = map_vcpu_capacity(
1426 cfg.vcpu_count.unwrap_or(1),
1427 &cfg.vcpu_affinity,
1428 &host_capacity,
1429 )?;
1430
1431 let host_clusters = Arch::get_host_cpu_clusters()?;
1432 let mapped_clusters = map_vcpu_clusters(
1433 cfg.vcpu_count.unwrap_or(1),
1434 &cfg.vcpu_affinity,
1435 host_clusters,
1436 )?;
1437
1438 (mapped_clusters, mapped_capacity)
1439 } else {
1440 (cfg.cpu_clusters.clone(), cfg.cpu_capacity.clone())
1441 };
1442
1443 #[cfg(target_arch = "aarch64")]
1444 let cpu_ipc_ratio = if cfg.host_cpu_topology {
1445 &vcpu_capacity
1446 } else {
1447 &cfg.cpu_ipc_ratio
1448 };
1449
1450 #[cfg(target_arch = "aarch64")]
1451 let mut vcpu_domain_paths = BTreeMap::new();
1452 #[cfg(target_arch = "aarch64")]
1453 let mut vcpu_domains = BTreeMap::new();
1454
1455 #[cfg(target_arch = "aarch64")]
1456 if cfg.virt_cpufreq || cfg.virt_cpufreq_v2 {
1457 if !cfg.cpu_frequencies_khz.is_empty() {
1458 vcpu_frequencies = cfg.cpu_frequencies_khz.clone();
1459 } else {
1460 match Arch::get_host_cpu_frequencies_khz() {
1461 Ok(host_cpu_frequencies) => {
1462 for vcpu_id in 0..cfg.vcpu_count.unwrap_or(1) {
1463 let vcpu_affinity = match cfg.vcpu_affinity.clone() {
1464 Some(VcpuAffinity::Global(v)) => v,
1465 Some(VcpuAffinity::PerVcpu(mut m)) => {
1466 m.remove(&vcpu_id).unwrap_or_default()
1467 }
1468 None => {
1469 panic!("There must be some vcpu_affinity setting with VirtCpufreq enabled!")
1470 }
1471 };
1472
1473 if let Some(freq_domain) = host_cpu_frequencies.get(&vcpu_affinity[0]) {
1476 for cpu in vcpu_affinity.iter() {
1477 if let Some(frequencies) = host_cpu_frequencies.get(cpu) {
1478 if frequencies != freq_domain {
1479 panic!("Affined CPUs do not share a frequency domain!");
1480 }
1481 }
1482 }
1483 vcpu_frequencies.insert(vcpu_id, freq_domain.clone());
1484 } else {
1485 panic!("No frequency domain for vcpu:{vcpu_id}");
1486 }
1487 }
1488 }
1489 Err(e) => {
1490 warn!("Unable to get host cpu frequencies {:#}", e);
1491 }
1492 }
1493 }
1494
1495 if !vcpu_frequencies.is_empty() {
1496 let host_max_freqs = Arch::get_host_cpu_max_freq_khz()?;
1497 let host_max_freq = host_max_freqs.values().copied().max().unwrap_or_default();
1500
1501 normalized_cpu_ipc_ratios = normalize_cpu_ipc_ratios(
1502 vcpu_frequencies.iter().map(|(vcpu_id, frequencies)| {
1503 (
1504 *vcpu_id,
1505 frequencies.iter().copied().max().unwrap_or_default(),
1506 )
1507 }),
1508 host_max_freq,
1509 |vcpu_id| {
1510 cpu_ipc_ratio
1511 .get(&vcpu_id)
1512 .copied()
1513 .unwrap_or(DEFAULT_CPU_CAPACITY)
1514 },
1515 )?;
1516
1517 if !cfg.cpu_freq_domains.is_empty() {
1518 let cgroup_path = cfg
1519 .vcpu_cgroup_path
1520 .clone()
1521 .context("cpu_freq_domains requires vcpu_cgroup_path")?;
1522
1523 if !cgroup_path.join("cgroup.controllers").exists() {
1524 panic!("CGroupsV2 must be enabled for cpu freq domain support!");
1525 }
1526
1527 let cgroup_procs_path = cgroup_path.join("cgroup.procs");
1529 std::fs::write(
1530 cgroup_procs_path.clone(),
1531 process::id().to_string().as_bytes(),
1532 )
1533 .with_context(|| {
1534 format!(
1535 "failed to create vcpu-cgroup-path {}",
1536 cgroup_procs_path.display(),
1537 )
1538 })?;
1539
1540 for (freq_domain_idx, cpus) in cfg.cpu_freq_domains.iter().enumerate() {
1541 let vcpu_domain_path =
1542 cgroup_path.join(format!("vcpu-domain{freq_domain_idx}"));
1543 create_dir_all(&vcpu_domain_path)?;
1545
1546 let cgroup_type_path = cgroup_path.join(vcpu_domain_path.join("cgroup.type"));
1549 std::fs::write(cgroup_type_path.clone(), b"threaded").with_context(|| {
1550 format!(
1551 "failed to create vcpu-cgroup-path {}",
1552 cgroup_type_path.display(),
1553 )
1554 })?;
1555 for core_idx in cpus.iter() {
1556 vcpu_domain_paths.insert(*core_idx, vcpu_domain_path.clone());
1557 vcpu_domains.insert(*core_idx, freq_domain_idx as u32);
1558 }
1559 }
1560 }
1561 }
1562 }
1563
1564 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
1565 let vcpu_properties = arch::derive_vcpu_properties(
1566 vcpu_count,
1567 &vcpu_capacity,
1568 &cfg.dynamic_power_coefficient,
1569 &vcpu_frequencies,
1570 #[cfg(all(
1571 target_arch = "aarch64",
1572 any(target_os = "android", target_os = "linux")
1573 ))]
1574 &normalized_cpu_ipc_ratios,
1575 #[cfg(all(
1576 target_arch = "aarch64",
1577 any(target_os = "android", target_os = "linux")
1578 ))]
1579 &vcpu_domains,
1580 #[cfg(all(
1581 target_arch = "aarch64",
1582 any(target_os = "android", target_os = "linux")
1583 ))]
1584 &vcpu_domain_paths,
1585 );
1586
1587 Ok(VmComponents {
1588 #[cfg(target_arch = "x86_64")]
1589 ac_adapter: cfg.ac_adapter,
1590 #[cfg(target_arch = "x86_64")]
1591 break_linux_pci_config_io: cfg.break_linux_pci_config_io,
1592 memory_size: cfg
1593 .memory
1594 .unwrap_or(256)
1595 .checked_mul(1024 * 1024)
1596 .ok_or_else(|| anyhow!("requested memory size too large"))?,
1597 swiotlb,
1598 fw_cfg_enable,
1599 bootorder_fw_cfg_blob: Vec::new(),
1600 vcpu_properties,
1601 vcpu_affinity: cfg.vcpu_affinity.clone(),
1602 fw_cfg_parameters: cfg.fw_cfg_parameters.clone(),
1603 vcpu_clusters,
1604 dev_pm: cfg.dev_pm,
1605 no_smt: cfg.no_smt,
1606 hugepages: cfg.hugepages,
1607 hv_cfg: hypervisor::Config {
1608 #[cfg(target_arch = "aarch64")]
1609 mte: cfg.mte,
1610 protection_type: cfg.protection_type,
1611 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
1612 ffa: cfg.ffa.map(|g| g.auto).unwrap_or(false),
1613 force_disable_readonly_mem: cfg.force_disable_readonly_mem,
1614 },
1615 vm_image,
1616 android_fstab: cfg
1617 .android_fstab
1618 .as_ref()
1619 .map(|x| {
1620 File::open(x)
1621 .with_context(|| format!("failed to open android fstab file {}", x.display()))
1622 })
1623 .map_or(Ok(None), |v| v.map(Some))?,
1624 pstore: cfg.pstore.clone(),
1625 pflash_block_size,
1626 pflash_image,
1627 initrd_image,
1628 extra_kernel_params: cfg.params.clone(),
1629 acpi_sdts: cfg
1630 .acpi_tables
1631 .iter()
1632 .map(|path| {
1633 SDT::from_file(path)
1634 .with_context(|| format!("failed to open ACPI file {}", path.display()))
1635 })
1636 .collect::<Result<Vec<SDT>>>()?,
1637 rt_cpus: cfg.rt_cpus.clone(),
1638 delay_rt: cfg.delay_rt,
1639 no_i8042: cfg.no_i8042,
1640 no_rtc: cfg.no_rtc,
1641 #[cfg(target_arch = "x86_64")]
1642 smbios: cfg.smbios.clone(),
1643 host_cpu_topology: cfg.host_cpu_topology,
1644 itmt: cfg.itmt,
1645 #[cfg(target_arch = "x86_64")]
1646 force_s2idle: cfg.force_s2idle,
1647 pvm_fw: pvm_fw_image,
1648 pci_config: cfg.pci_config,
1649 boot_cpu: cfg.boot_cpu,
1650 vfio_platform_pm: cfg.vfio_platform_pm,
1651 #[cfg(target_arch = "aarch64")]
1652 virt_cpufreq_v2: cfg.virt_cpufreq_v2,
1653 smccc_trng: cfg.smccc_trng,
1654 #[cfg(target_arch = "aarch64")]
1655 sve_config: cfg.sve.unwrap_or_default(),
1656 })
1657}
1658
1659#[cfg(target_arch = "aarch64")]
1660fn normalize_cpu_ipc_ratios(
1661 max_frequency_per_cpu: impl Iterator<Item = (usize, u32)>,
1662 host_max_freq: u32,
1663 cpu_ipc_ratio: impl Fn(usize) -> u32,
1664) -> Result<BTreeMap<usize, u32>> {
1665 if host_max_freq == 0 {
1666 return Err(anyhow!("invalid host_max_freq 0"));
1667 }
1668
1669 let host_max_freq = u64::from(host_max_freq);
1670 let mut normalized_cpu_ipc_ratios = BTreeMap::new();
1671 for (cpu_id, max_freq) in max_frequency_per_cpu {
1672 let ipc_ratio = u64::from(cpu_ipc_ratio(cpu_id));
1673 let max_freq = u64::from(max_freq);
1674
1675 let normalized_cpu_ipc_ratio = (ipc_ratio * max_freq) / host_max_freq;
1676
1677 normalized_cpu_ipc_ratios.insert(
1678 cpu_id,
1679 u32::try_from(normalized_cpu_ipc_ratio)
1680 .context("normalized CPU IPC ratio out of u32 range")?,
1681 );
1682 }
1683
1684 Ok(normalized_cpu_ipc_ratios)
1685}
1686
1687#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1688pub enum ExitState {
1689 Reset,
1690 Stop,
1691 Crash,
1692 GuestPanic,
1693 WatchdogReset,
1694}
1695
1696fn punch_holes_in_guest_mem_layout_for_mappings(
1699 guest_mem_layout: Vec<(GuestAddress, u64, MemoryRegionOptions)>,
1700 file_backed_mappings_ram: &[FileBackedMappingParameters],
1701) -> Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>> {
1702 let mut layout_set = BTreeSet::new();
1705 for (addr, size, options) in &guest_mem_layout {
1706 layout_set.insert((addr.offset(), addr.offset() + size, options.clone()));
1707 }
1708
1709 for mapping in file_backed_mappings_ram {
1713 anyhow::ensure!(
1714 layout_set
1715 .iter()
1716 .any(|(addr, size, _)| *addr <= mapping.address
1717 && mapping.address + mapping.size <= *addr + *size),
1718 "RAM file-backed-mapping must be a subset of a RAM region"
1719 );
1720 }
1721
1722 for mapping in file_backed_mappings_ram.iter().cloned() {
1723 let mapping_start = mapping.address;
1724 let mapping_end = mapping_start + mapping.size;
1725 let mut purpose = None;
1726 while let Some((range_start, range_end, options)) = layout_set
1728 .iter()
1729 .find(|&&(range_start, range_end, _)| {
1730 mapping_start < range_end && mapping_end > range_start
1731 })
1732 .cloned()
1733 {
1734 let purpose = *purpose.get_or_insert(options.purpose);
1735 anyhow::ensure!(
1736 options.purpose == purpose,
1737 "RAM file-backed-mapping cannot span regions with different purposes: {:?} vs {:?}",
1738 options.purpose,
1739 purpose
1740 );
1741
1742 layout_set.remove(&(range_start, range_end, options.clone()));
1743
1744 if range_start < mapping_start {
1745 layout_set.insert((range_start, mapping_start, options.clone()));
1746 }
1747 if range_end > mapping_end {
1748 layout_set.insert((mapping_end, range_end, options));
1749 }
1750 }
1751 layout_set.insert((
1752 mapping_start,
1753 mapping_end,
1754 MemoryRegionOptions::new()
1755 .purpose(purpose.unwrap())
1756 .file_backed(mapping),
1757 ));
1758 }
1759
1760 Ok(layout_set
1762 .into_iter()
1763 .map(|(start, end, options)| (GuestAddress(start), end - start, options))
1764 .collect())
1765}
1766
1767fn create_guest_memory(
1768 cfg: &Config,
1769 components: &VmComponents,
1770 arch_memory_layout: &<Arch as LinuxArch>::ArchMemoryLayout,
1771 hypervisor: &impl Hypervisor,
1772) -> Result<GuestMemory> {
1773 let guest_mem_layout = Arch::guest_memory_layout(components, arch_memory_layout, hypervisor)
1774 .context("failed to create guest memory layout")?;
1775
1776 let guest_mem_layout = punch_holes_in_guest_mem_layout_for_mappings(
1777 guest_mem_layout,
1778 &cfg.file_backed_mappings_ram,
1779 )?;
1780
1781 let mut guest_mem = GuestMemory::new_with_options(&guest_mem_layout)
1782 .context("failed to create guest memory")?;
1783 let mut mem_policy = MemoryPolicy::empty();
1784 if components.hugepages {
1785 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
1786 }
1787
1788 if cfg.lock_guest_memory {
1789 mem_policy |= MemoryPolicy::LOCK_GUEST_MEMORY;
1790 }
1791 if cfg.jail_config.is_none() {
1794 mem_policy |= MemoryPolicy::USE_PUNCHHOLE_LOCKED;
1795 }
1796 guest_mem.set_memory_policy(mem_policy);
1797
1798 if cfg.unmap_guest_memory_on_fork {
1799 guest_mem.use_dontfork().context("use_dontfork failed")?;
1806 }
1807
1808 Ok(guest_mem)
1809}
1810
1811#[cfg(all(target_arch = "aarch64", feature = "geniezone"))]
1812fn run_gz(device_path: Option<&Path>, cfg: Config, components: VmComponents) -> Result<ExitState> {
1813 use devices::GeniezoneKernelIrqChip;
1814 use hypervisor::geniezone::Geniezone;
1815 use hypervisor::geniezone::GeniezoneVcpu;
1816 use hypervisor::geniezone::GeniezoneVm;
1817
1818 let device_path = device_path.unwrap_or(Path::new(GENIEZONE_PATH));
1819 let gzvm = Geniezone::new_with_path(device_path)
1820 .with_context(|| format!("failed to open GenieZone device {}", device_path.display()))?;
1821
1822 let arch_memory_layout =
1823 Arch::arch_memory_layout(&components).context("failed to create arch memory layout")?;
1824 let guest_mem = create_guest_memory(&cfg, &components, &arch_memory_layout, &gzvm)?;
1825
1826 #[cfg(feature = "swap")]
1827 let swap_controller = if let Some(swap_dir) = cfg.swap_dir.as_ref() {
1828 Some(
1829 SwapController::launch(guest_mem.clone(), swap_dir, cfg.jail_config.as_ref())
1830 .context("launch vmm-swap monitor process")?,
1831 )
1832 } else {
1833 None
1834 };
1835
1836 let vm =
1837 GeniezoneVm::new(&gzvm, guest_mem, components.hv_cfg).context("failed to create vm")?;
1838
1839 if cfg.protection_type.isolates_memory() && !vm.check_capability(VmCap::Protected) {
1841 bail!("Failed to create protected VM");
1842 }
1843 let vm_clone = vm.try_clone().context("failed to clone vm")?;
1844
1845 let ioapic_host_tube;
1846 let mut irq_chip = match cfg.irq_chip.unwrap_or_default() {
1847 IrqChipKind::Split => bail!("Geniezone does not support split irqchip mode"),
1848 IrqChipKind::Userspace => bail!("Geniezone does not support userspace irqchip mode"),
1849 IrqChipKind::Kernel { allow_vgic_its: _ } => {
1850 ioapic_host_tube = None;
1851 GeniezoneKernelIrqChip::new(vm_clone, components.vcpu_properties.len())
1852 .context("failed to create IRQ chip")?
1853 }
1854 };
1855
1856 run_vm::<GeniezoneVcpu, GeniezoneVm>(
1857 cfg,
1858 components,
1859 &arch_memory_layout,
1860 vm,
1861 &mut irq_chip,
1862 ioapic_host_tube,
1863 #[cfg(feature = "swap")]
1864 swap_controller,
1865 )
1866}
1867
1868#[cfg(all(target_arch = "aarch64", feature = "halla"))]
1869fn run_halla(
1870 device_path: Option<&Path>,
1871 cfg: Config,
1872 components: VmComponents,
1873) -> Result<ExitState> {
1874 use devices::HallaKernelIrqChip;
1875 use hypervisor::halla::Halla;
1876 use hypervisor::halla::HallaVcpu;
1877 use hypervisor::halla::HallaVm;
1878
1879 let device_path = device_path.unwrap_or(Path::new(HALLA_PATH));
1880 let hvm = Halla::new_with_path(device_path)
1881 .with_context(|| format!("failed to open Halla device {}", device_path.display()))?;
1882
1883 let arch_memory_layout =
1884 Arch::arch_memory_layout(&components).context("failed to create arch memory layout")?;
1885 let guest_mem = create_guest_memory(&cfg, &components, &arch_memory_layout, &hvm)?;
1886
1887 #[cfg(feature = "swap")]
1888 let swap_controller = if let Some(swap_dir) = cfg.swap_dir.as_ref() {
1889 Some(
1890 SwapController::launch(guest_mem.clone(), swap_dir, cfg.jail_config.as_ref())
1891 .context("launch vmm-swap monitor process")?,
1892 )
1893 } else {
1894 None
1895 };
1896
1897 let vm = HallaVm::new(&hvm, guest_mem, components.hv_cfg).context("failed to create vm")?;
1898
1899 if cfg.protection_type.isolates_memory() && !vm.check_capability(VmCap::Protected) {
1901 bail!("Failed to create protected VM");
1902 }
1903 let vm_clone = vm.try_clone().context("failed to clone vm")?;
1904
1905 let ioapic_host_tube;
1906 let mut irq_chip = match cfg.irq_chip.unwrap_or_default() {
1907 IrqChipKind::Split => bail!("Halla does not support split irqchip mode"),
1908 IrqChipKind::Userspace => bail!("Halla does not support userspace irqchip mode"),
1909 IrqChipKind::Kernel { allow_vgic_its: _ } => {
1910 ioapic_host_tube = None;
1911 HallaKernelIrqChip::new(vm_clone, components.vcpu_properties.len())
1912 .context("failed to create IRQ chip")?
1913 }
1914 };
1915
1916 run_vm::<HallaVcpu, HallaVm>(
1917 cfg,
1918 components,
1919 &arch_memory_layout,
1920 vm,
1921 &mut irq_chip,
1922 ioapic_host_tube,
1923 #[cfg(feature = "swap")]
1924 swap_controller,
1925 )
1926}
1927
1928fn run_kvm(device_path: Option<&Path>, cfg: Config, components: VmComponents) -> Result<ExitState> {
1929 use devices::KvmKernelIrqChip;
1930 #[cfg(target_arch = "x86_64")]
1931 use devices::KvmSplitIrqChip;
1932 use hypervisor::kvm::Kvm;
1933 use hypervisor::kvm::KvmVcpu;
1934 use hypervisor::kvm::KvmVm;
1935
1936 let device_path = device_path.unwrap_or(Path::new(KVM_PATH));
1937 let kvm = Kvm::new_with_path(device_path)
1938 .with_context(|| format!("failed to open KVM device {}", device_path.display()))?;
1939
1940 let arch_memory_layout =
1941 Arch::arch_memory_layout(&components).context("failed to create arch memory layout")?;
1942 let guest_mem = create_guest_memory(&cfg, &components, &arch_memory_layout, &kvm)?;
1943
1944 #[cfg(feature = "swap")]
1945 let swap_controller = if let Some(swap_dir) = cfg.swap_dir.as_ref() {
1946 Some(
1947 SwapController::launch(guest_mem.clone(), swap_dir, cfg.jail_config.as_ref())
1948 .context("launch vmm-swap monitor process")?,
1949 )
1950 } else {
1951 None
1952 };
1953
1954 let vm = KvmVm::new(&kvm, guest_mem, components.hv_cfg).context("failed to create vm")?;
1955
1956 #[cfg(target_arch = "x86_64")]
1957 if cfg.itmt {
1958 vm.set_platform_info_read_access(false)
1959 .context("failed to disable MSR_PLATFORM_INFO read access")?;
1960 }
1961
1962 #[cfg(not(target_arch = "x86_64"))]
1966 if cfg.protection_type.isolates_memory() && !vm.check_capability(VmCap::Protected) {
1967 bail!("Failed to create protected VM");
1968 }
1969 let vm_clone = vm.try_clone().context("failed to clone vm")?;
1970
1971 enum KvmIrqChip {
1972 #[cfg(target_arch = "x86_64")]
1973 Split(KvmSplitIrqChip),
1974 Kernel(KvmKernelIrqChip),
1975 }
1976
1977 impl KvmIrqChip {
1978 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
1979 match self {
1980 #[cfg(target_arch = "x86_64")]
1981 KvmIrqChip::Split(i) => i,
1982 KvmIrqChip::Kernel(i) => i,
1983 }
1984 }
1985 }
1986
1987 let ioapic_host_tube;
1988 let mut irq_chip = match cfg.irq_chip.unwrap_or_default() {
1989 IrqChipKind::Userspace => {
1990 bail!("KVM userspace irqchip mode not implemented");
1991 }
1992 IrqChipKind::Split => {
1993 #[cfg(not(target_arch = "x86_64"))]
1994 bail!("KVM split irqchip mode only supported on x86 processors");
1995 #[cfg(target_arch = "x86_64")]
1996 {
1997 let (host_tube, ioapic_device_tube) =
1998 Tube::pair().context("failed to create tube")?;
1999 ioapic_host_tube = Some(host_tube);
2000 KvmIrqChip::Split(
2001 KvmSplitIrqChip::new(
2002 vm_clone,
2003 components.vcpu_properties.len(),
2004 ioapic_device_tube,
2005 Some(24),
2006 )
2007 .context("failed to create IRQ chip")?,
2008 )
2009 }
2010 }
2011 IrqChipKind::Kernel {
2012 #[cfg(target_arch = "aarch64")]
2013 allow_vgic_its,
2014 } => {
2015 ioapic_host_tube = None;
2016 KvmIrqChip::Kernel(
2017 KvmKernelIrqChip::new(
2018 vm_clone,
2019 components.vcpu_properties.len(),
2020 #[cfg(target_arch = "aarch64")]
2021 allow_vgic_its,
2022 )
2023 .context("failed to create IRQ chip")?,
2024 )
2025 }
2026 };
2027
2028 run_vm::<KvmVcpu, KvmVm>(
2029 cfg,
2030 components,
2031 &arch_memory_layout,
2032 vm,
2033 irq_chip.as_mut(),
2034 ioapic_host_tube,
2035 #[cfg(feature = "swap")]
2036 swap_controller,
2037 )
2038}
2039
2040#[cfg(all(target_arch = "aarch64", feature = "gunyah"))]
2041fn run_gunyah(
2042 device_path: Option<&Path>,
2043 qcom_trusted_vm_id: Option<u16>,
2044 qcom_trusted_vm_pas_id: Option<u32>,
2045 cfg: Config,
2046 components: VmComponents,
2047) -> Result<ExitState> {
2048 use devices::GunyahIrqChip;
2049 use hypervisor::gunyah::Gunyah;
2050 use hypervisor::gunyah::GunyahVcpu;
2051 use hypervisor::gunyah::GunyahVm;
2052
2053 let device_path = device_path.unwrap_or(Path::new(GUNYAH_PATH));
2054 let gunyah = Gunyah::new_with_path(device_path)
2055 .with_context(|| format!("failed to open Gunyah device {}", device_path.display()))?;
2056
2057 let arch_memory_layout =
2058 Arch::arch_memory_layout(&components).context("failed to create arch memory layout")?;
2059 let guest_mem = create_guest_memory(&cfg, &components, &arch_memory_layout, &gunyah)?;
2060
2061 #[cfg(feature = "swap")]
2062 let swap_controller = if let Some(swap_dir) = cfg.swap_dir.as_ref() {
2063 Some(
2064 SwapController::launch(guest_mem.clone(), swap_dir, cfg.jail_config.as_ref())
2065 .context("launch vmm-swap monitor process")?,
2066 )
2067 } else {
2068 None
2069 };
2070
2071 let vm = GunyahVm::new(
2072 &gunyah,
2073 qcom_trusted_vm_id,
2074 qcom_trusted_vm_pas_id,
2075 guest_mem,
2076 components.hv_cfg,
2077 )
2078 .context("failed to create vm")?;
2079
2080 if cfg.protection_type.isolates_memory() && !vm.check_capability(VmCap::Protected) {
2082 bail!("Failed to create protected VM");
2083 }
2084
2085 let vm_clone = vm.try_clone()?;
2086
2087 run_vm::<GunyahVcpu, GunyahVm>(
2088 cfg,
2089 components,
2090 &arch_memory_layout,
2091 vm,
2092 &mut GunyahIrqChip::new(vm_clone)?,
2093 None,
2094 #[cfg(feature = "swap")]
2095 swap_controller,
2096 )
2097}
2098
2099fn get_default_hypervisor() -> Option<HypervisorKind> {
2101 let kvm_path = Path::new(KVM_PATH);
2102 if kvm_path.exists() {
2103 return Some(HypervisorKind::Kvm {
2104 device: Some(kvm_path.to_path_buf()),
2105 });
2106 }
2107
2108 #[cfg(all(target_arch = "aarch64", feature = "geniezone"))]
2109 {
2110 let gz_path = Path::new(GENIEZONE_PATH);
2111 if gz_path.exists() {
2112 return Some(HypervisorKind::Geniezone {
2113 device: Some(gz_path.to_path_buf()),
2114 });
2115 }
2116 }
2117
2118 #[cfg(target_arch = "aarch64")]
2119 #[cfg(feature = "halla")]
2120 {
2121 let halla_path = Path::new(HALLA_PATH);
2122 if halla_path.exists() {
2123 return Some(HypervisorKind::Halla {
2124 device: Some(halla_path.to_path_buf()),
2125 });
2126 }
2127 }
2128
2129 #[cfg(all(unix, target_arch = "aarch64", feature = "gunyah"))]
2130 {
2131 let gunyah_path = Path::new(GUNYAH_PATH);
2132 if gunyah_path.exists() {
2133 return Some(HypervisorKind::Gunyah {
2134 device: Some(gunyah_path.to_path_buf()),
2135 qcom_trusted_vm_id: None,
2136 qcom_trusted_vm_pas_id: None,
2137 });
2138 }
2139 }
2140
2141 None
2142}
2143
2144pub fn run_config(cfg: Config) -> Result<ExitState> {
2145 let components = setup_vm_components(&cfg)?;
2146
2147 let hypervisor = cfg
2148 .hypervisor
2149 .clone()
2150 .or_else(get_default_hypervisor)
2151 .context("no enabled hypervisor")?;
2152
2153 debug!("creating hypervisor: {:?}", hypervisor);
2154
2155 match hypervisor {
2156 HypervisorKind::Kvm { device } => run_kvm(device.as_deref(), cfg, components),
2157 #[cfg(all(target_arch = "aarch64", feature = "geniezone"))]
2158 HypervisorKind::Geniezone { device } => run_gz(device.as_deref(), cfg, components),
2159 #[cfg(target_arch = "aarch64")]
2160 #[cfg(feature = "halla")]
2161 HypervisorKind::Halla { device } => run_halla(device.as_deref(), cfg, components),
2162 #[cfg(all(unix, target_arch = "aarch64", feature = "gunyah"))]
2163 HypervisorKind::Gunyah {
2164 device,
2165 qcom_trusted_vm_id,
2166 qcom_trusted_vm_pas_id,
2167 } => run_gunyah(
2168 device.as_deref(),
2169 qcom_trusted_vm_id,
2170 qcom_trusted_vm_pas_id,
2171 cfg,
2172 components,
2173 ),
2174 }
2175}
2176
2177fn run_vm<Vcpu, V>(
2178 cfg: Config,
2179 #[allow(unused_mut)] mut components: VmComponents,
2180 arch_memory_layout: &<Arch as LinuxArch>::ArchMemoryLayout,
2181 mut vm: V,
2182 irq_chip: &mut dyn IrqChipArch,
2183 ioapic_host_tube: Option<Tube>,
2184 #[cfg(feature = "swap")] mut swap_controller: Option<SwapController>,
2185) -> Result<ExitState>
2186where
2187 Vcpu: VcpuArch + 'static,
2188 V: VmArch + 'static,
2189{
2190 if cfg.jail_config.is_some() {
2191 info!("crosvm entering multiprocess mode");
2195 }
2196
2197 let (metrics_send, metrics_recv) = Tube::directional_pair().context("metrics tube")?;
2198 metrics::initialize(metrics_send);
2199
2200 #[cfg(all(feature = "pci-hotplug", feature = "swap"))]
2201 let swap_device_helper = match &swap_controller {
2202 Some(swap_controller) => Some(swap_controller.create_device_helper()?),
2203 None => None,
2204 };
2205 #[cfg(all(feature = "pci-hotplug", not(target_arch = "x86_64")))]
2208 if cfg.pci_hotplug_slots.is_some() {
2209 bail!("pci-hotplug is not implemented for non x86_64 architecture");
2210 }
2211 #[cfg(feature = "pci-hotplug")]
2213 #[allow(unused_mut)]
2215 let mut hotplug_manager = if cfg.pci_hotplug_slots.is_some() {
2216 Some(PciHotPlugManager::new(
2217 vm.get_memory().clone(),
2218 &cfg,
2219 #[cfg(feature = "swap")]
2220 swap_device_helper,
2221 )?)
2222 } else {
2223 None
2224 };
2225
2226 #[cfg(feature = "usb")]
2227 let (usb_control_tube, usb_provider) =
2228 DeviceProvider::new().context("failed to create usb provider")?;
2229
2230 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
2234
2235 let control_server_socket = match &cfg.socket_path {
2236 Some(path) => Some(UnlinkUnixSeqpacketListener(
2237 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
2238 )),
2239 None => None,
2240 };
2241
2242 let mut all_control_tubes = Vec::new();
2243 let mut add_control_tube = |t| all_control_tubes.push(t);
2244
2245 if let Some(ioapic_host_tube) = ioapic_host_tube {
2246 add_control_tube(AnyControlTube::IrqTube(ioapic_host_tube));
2247 }
2248
2249 let battery = if cfg.battery_config.is_some() {
2250 #[cfg_attr(
2251 not(feature = "power-monitor-powerd"),
2252 allow(clippy::manual_map, clippy::needless_match, unused_mut)
2253 )]
2254 let jail = if let Some(jail_config) = cfg.jail_config.as_ref() {
2255 let mut config = SandboxConfig::new(jail_config, "battery");
2256 #[cfg(feature = "power-monitor-powerd")]
2257 {
2258 config.bind_mounts = true;
2259 }
2260 let mut jail =
2261 create_sandbox_minijail(&jail_config.pivot_root, MAX_OPEN_FILES_DEFAULT, &config)?;
2262
2263 #[cfg(feature = "power-monitor-powerd")]
2265 {
2266 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2267 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2268 }
2269 Some(jail)
2270 } else {
2271 None
2272 };
2273 (cfg.battery_config.as_ref().map(|c| c.type_), jail)
2274 } else {
2275 (cfg.battery_config.as_ref().map(|c| c.type_), None)
2276 };
2277
2278 let (vm_evt_wrtube, vm_evt_rdtube) =
2279 Tube::directional_pair().context("failed to create vm event tube")?;
2280
2281 let pstore_size = components.pstore.as_ref().map(|pstore| pstore.size as u64);
2282 let mut sys_allocator = SystemAllocator::new(
2283 Arch::get_system_allocator_config(&vm, arch_memory_layout),
2284 pstore_size,
2285 &cfg.mmio_address_ranges,
2286 )
2287 .context("failed to create system allocator")?;
2288
2289 let ramoops_region = match &components.pstore {
2290 Some(pstore) => Some(
2291 arch::pstore::create_memory_region(
2292 &mut vm,
2293 sys_allocator.reserved_region().unwrap(),
2294 pstore,
2295 )
2296 .context("failed to allocate pstore region")?,
2297 ),
2298 None => None,
2299 };
2300
2301 create_mmio_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
2302
2303 #[cfg(feature = "gpu")]
2304 let (_render_server_jail, render_server_fd) =
2306 if let Some(parameters) = &cfg.gpu_render_server_parameters {
2307 let (jail, fd) = start_gpu_render_server(&cfg, parameters)?;
2308 (Some(ScopedMinijail(jail)), Some(fd))
2309 } else {
2310 (None, None)
2311 };
2312
2313 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> =
2314 BTreeMap::new();
2315 let mut iova_max_addr: Option<u64> = None;
2316
2317 let mut vfio_container_manager = VfioContainerManager::new();
2318
2319 #[cfg(feature = "registered_events")]
2320 let (reg_evt_wrtube, reg_evt_rdtube) =
2321 Tube::directional_pair().context("failed to create registered event tube")?;
2322
2323 let mut worker_process_pids = BTreeSet::new();
2324
2325 let mut devices = create_devices(
2326 &cfg,
2327 &mut vm,
2328 &mut sys_allocator,
2329 &mut add_control_tube,
2330 &vm_evt_wrtube,
2331 &mut iommu_attached_endpoints,
2332 #[cfg(feature = "usb")]
2333 usb_provider,
2334 #[cfg(feature = "gpu")]
2335 render_server_fd,
2336 &mut iova_max_addr,
2337 #[cfg(feature = "registered_events")]
2338 ®_evt_wrtube,
2339 &mut vfio_container_manager,
2340 &mut worker_process_pids,
2341 )?;
2342
2343 #[cfg(feature = "pci-hotplug")]
2344 #[allow(unused_variables)]
2346 let pci_hotplug_slots = cfg.pci_hotplug_slots;
2347 #[cfg(not(feature = "pci-hotplug"))]
2348 #[allow(unused_variables)]
2349 let pci_hotplug_slots: Option<u8> = None;
2350 #[cfg(target_arch = "x86_64")]
2351 let hp_stub = create_pure_virtual_pcie_root_port(
2352 &mut sys_allocator,
2353 &mut add_control_tube,
2354 &mut devices,
2355 pci_hotplug_slots.unwrap_or(1),
2356 )?;
2357
2358 arch::assign_pci_addresses(&mut devices, &mut sys_allocator)?;
2359
2360 let pci_devices: Vec<&dyn PciDevice> = devices
2361 .iter()
2362 .filter_map(|d| (d.0).as_pci_device())
2363 .collect();
2364
2365 let virtio_devices: Vec<(&dyn VirtioDevice, devices::PciAddress)> = pci_devices
2366 .into_iter()
2367 .flat_map(|s| {
2368 if let Some(virtio_pci_device) = s.as_virtio_pci_device() {
2369 std::iter::zip(
2370 Some(virtio_pci_device.virtio_device()),
2371 virtio_pci_device.pci_address(),
2372 )
2373 .next()
2374 } else {
2375 None
2376 }
2377 })
2378 .collect();
2379
2380 let mut open_firmware_device_paths: Vec<(Vec<u8>, usize)> = virtio_devices
2381 .iter()
2382 .flat_map(|s| (s.0).bootorder_fw_cfg(s.1.dev))
2383 .collect();
2384
2385 open_firmware_device_paths.sort_by(|a, b| (a.1).cmp(&(b.1)));
2387
2388 let mut bootorder_fw_cfg_blob =
2390 open_firmware_device_paths
2391 .into_iter()
2392 .fold(Vec::new(), |a, b| {
2393 a.into_iter()
2394 .chain("/pci@i0cf8/".as_bytes().iter().copied())
2395 .chain(b.0)
2396 .chain("\n".as_bytes().iter().copied())
2397 .collect()
2398 });
2399
2400 bootorder_fw_cfg_blob.push(0);
2402
2403 components.bootorder_fw_cfg_blob = bootorder_fw_cfg_blob;
2404
2405 components.fw_cfg_enable |= components.bootorder_fw_cfg_blob.len() > 1;
2408
2409 let (translate_response_senders, request_rx) = setup_virtio_access_platform(
2410 &mut sys_allocator,
2411 &mut iommu_attached_endpoints,
2412 &mut devices,
2413 )?;
2414
2415 #[cfg(target_arch = "x86_64")]
2416 let iommu_bus_ranges = hp_stub.iommu_bus_ranges;
2417 #[cfg(not(target_arch = "x86_64"))]
2418 let iommu_bus_ranges = Vec::new();
2419
2420 let iommu_host_tube = if !iommu_attached_endpoints.is_empty()
2421 || (cfg.vfio_isolate_hotplug && !iommu_bus_ranges.is_empty())
2422 {
2423 let (iommu_host_tube, iommu_device_tube) = Tube::pair().context("failed to create tube")?;
2424 let iommu_dev = create_iommu_device(
2425 cfg.protection_type,
2426 cfg.jail_config.as_ref(),
2427 iova_max_addr.unwrap_or(u64::MAX),
2428 iommu_attached_endpoints,
2429 iommu_bus_ranges,
2430 translate_response_senders,
2431 request_rx,
2432 iommu_device_tube,
2433 )?;
2434
2435 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2436 add_control_tube(AnyControlTube::IrqTube(msi_host_tube));
2437 let (ioevent_host_tube, ioevent_device_tube) =
2438 Tube::pair().context("failed to create ioevent tube")?;
2439 add_control_tube(
2440 VmMemoryTube {
2441 tube: ioevent_host_tube,
2442 expose_with_viommu: false,
2443 }
2444 .into(),
2445 );
2446 let (host_tube, device_tube) =
2447 Tube::pair().context("failed to create device control tube")?;
2448 add_control_tube(TaggedControlTube::Vm(host_tube).into());
2449 let mut dev = VirtioPciDevice::new(
2450 vm.get_memory().clone(),
2451 iommu_dev.dev,
2452 msi_device_tube,
2453 cfg.disable_virtio_intx,
2454 None,
2455 VmMemoryClient::new(ioevent_device_tube),
2456 device_tube,
2457 )
2458 .context("failed to create virtio pci dev")?;
2459 dev.allocate_address(&mut sys_allocator)
2461 .context("failed to allocate resources early for virtio pci dev")?;
2462 let dev = Box::new(dev);
2463 devices.push((dev, iommu_dev.jail));
2464 Some(iommu_host_tube)
2465 } else {
2466 None
2467 };
2468
2469 #[cfg(target_arch = "x86_64")]
2470 for device in devices
2471 .iter_mut()
2472 .filter_map(|(dev, _)| dev.as_pci_device_mut())
2473 {
2474 device
2475 .generate_acpi(&mut components.acpi_sdts)
2476 .with_context(|| format!("generate_acpi failed for {}", device.debug_label()))?;
2477 }
2478
2479 let mut vcpu_ids = Vec::new();
2481
2482 let guest_suspended_cvar = if cfg.force_s2idle {
2483 Some(Arc::new((Mutex::new(false), Condvar::new())))
2484 } else {
2485 None
2486 };
2487
2488 let dt_overlays = cfg
2489 .device_tree_overlay
2490 .iter()
2491 .map(|o| {
2492 Ok(DtbOverlay {
2493 file: open_file_or_duplicate(o.path.as_path(), OpenOptions::new().read(true))
2494 .with_context(|| {
2495 format!("failed to open device tree overlay {}", o.path.display())
2496 })?,
2497 do_filter: o.filter_devs,
2498 })
2499 })
2500 .collect::<Result<Vec<DtbOverlay>>>()?;
2501
2502 #[cfg(target_arch = "aarch64")]
2503 let vcpu_domain_paths: BTreeMap<usize, PathBuf> = components
2504 .vcpu_properties
2505 .iter()
2506 .filter_map(|(id, props)| {
2507 props
2508 .vcpu_domain_path
2509 .as_ref()
2510 .map(|path| (*id, path.clone()))
2511 })
2512 .collect();
2513
2514 let mut linux = Arch::build_vm::<V, Vcpu>(
2515 components,
2516 arch_memory_layout,
2517 &vm_evt_wrtube,
2518 &mut sys_allocator,
2519 &cfg.serial_parameters,
2520 simple_jail(cfg.jail_config.as_ref(), "serial_device")?,
2521 battery,
2522 vm,
2523 ramoops_region,
2524 devices,
2525 irq_chip,
2526 &mut vcpu_ids,
2527 cfg.dump_device_tree_blob.clone(),
2528 simple_jail(cfg.jail_config.as_ref(), "serial_device")?,
2529 #[cfg(target_arch = "x86_64")]
2530 simple_jail(cfg.jail_config.as_ref(), "block_device")?,
2531 #[cfg(target_arch = "x86_64")]
2532 simple_jail(cfg.jail_config.as_ref(), "fw_cfg_device")?,
2533 #[cfg(feature = "swap")]
2534 &mut swap_controller,
2535 guest_suspended_cvar.clone(),
2536 dt_overlays,
2537 cfg.fdt_position,
2538 cfg.no_pmu,
2539 )
2540 .context("the architecture failed to build the vm")?;
2541
2542 for tube in linux.vm_request_tubes.drain(..) {
2543 add_control_tube(TaggedControlTube::Vm(tube).into());
2544 }
2545
2546 #[cfg(target_arch = "x86_64")]
2547 let (hp_control_tube, hp_worker_tube) = mpsc::channel();
2548 #[cfg(all(feature = "pci-hotplug", target_arch = "x86_64"))]
2549 if let Some(hotplug_manager) = &mut hotplug_manager {
2550 hotplug_manager.set_rootbus_controller(hp_control_tube.clone())?;
2551 }
2552 #[cfg(target_arch = "x86_64")]
2553 let hp_thread = {
2554 for (bus_num, hp_bus) in hp_stub.hotplug_buses.into_iter() {
2555 #[cfg(feature = "pci-hotplug")]
2556 if let Some(hotplug_manager) = &mut hotplug_manager {
2557 hotplug_manager.add_port(hp_bus)?;
2558 } else {
2559 linux.hotplug_bus.insert(bus_num, hp_bus);
2560 }
2561 #[cfg(not(feature = "pci-hotplug"))]
2562 linux.hotplug_bus.insert(bus_num, hp_bus);
2563 }
2564
2565 if let Some(pm) = &linux.pm {
2566 for (gpe, notify_dev) in hp_stub.gpe_notify_devs.into_iter() {
2567 pm.lock().register_gpe_notify_dev(gpe, notify_dev);
2568 }
2569 for (bus, notify_dev) in hp_stub.pme_notify_devs.into_iter() {
2570 pm.lock().register_pme_notify_dev(bus, notify_dev);
2571 }
2572 }
2573
2574 let (hp_vm_mem_host_tube, hp_vm_mem_worker_tube) =
2575 Tube::pair().context("failed to create tube")?;
2576 add_control_tube(
2577 VmMemoryTube {
2578 tube: hp_vm_mem_host_tube,
2579 expose_with_viommu: false,
2580 }
2581 .into(),
2582 );
2583
2584 let supports_readonly_mapping = linux.vm.supports_readonly_mapping();
2585 let pci_root = linux.root_config.clone();
2586 std::thread::Builder::new()
2587 .name("pci_root".to_string())
2588 .spawn(move || {
2589 start_pci_root_worker(
2590 supports_readonly_mapping,
2591 pci_root,
2592 hp_worker_tube,
2593 hp_vm_mem_worker_tube,
2594 )
2595 })?
2596 };
2597
2598 let flags = RutabagaGrallocBackendFlags::new().disable_vulkano();
2599 let gralloc = RutabagaGralloc::new(flags).context("failed to create gralloc")?;
2600
2601 run_control(
2602 linux,
2603 sys_allocator,
2604 cfg,
2605 control_server_socket,
2606 all_control_tubes,
2607 #[cfg(feature = "usb")]
2608 usb_control_tube,
2609 vm_evt_rdtube,
2610 vm_evt_wrtube,
2611 sigchld_fd,
2612 gralloc,
2613 vcpu_ids,
2614 iommu_host_tube,
2615 #[cfg(target_arch = "x86_64")]
2616 hp_control_tube,
2617 #[cfg(target_arch = "x86_64")]
2618 hp_thread,
2619 #[cfg(feature = "pci-hotplug")]
2620 hotplug_manager,
2621 #[cfg(feature = "swap")]
2622 swap_controller,
2623 #[cfg(feature = "registered_events")]
2624 reg_evt_rdtube,
2625 guest_suspended_cvar,
2626 metrics_recv,
2627 vfio_container_manager,
2628 worker_process_pids,
2629 #[cfg(target_arch = "aarch64")]
2630 vcpu_domain_paths,
2631 )
2632}
2633
2634#[cfg(target_arch = "x86_64")]
2643fn start_pci_root_worker(
2644 supports_readonly_mapping: bool,
2645 pci_root: Arc<Mutex<PciRoot>>,
2646 hp_device_tube: mpsc::Receiver<PciRootCommand>,
2647 vm_control_tube: Tube,
2648) {
2649 struct PciMmioMapperTube {
2650 supports_readonly_mapping: bool,
2651 vm_control_tube: Tube,
2652 registered_regions: BTreeMap<u32, VmMemoryRegionId>,
2653 next_id: u32,
2654 }
2655
2656 impl PciMmioMapper for PciMmioMapperTube {
2657 fn supports_readonly_mapping(&self) -> bool {
2658 self.supports_readonly_mapping
2659 }
2660
2661 fn add_mapping(&mut self, addr: GuestAddress, shmem: &SharedMemory) -> anyhow::Result<u32> {
2662 let shmem = shmem
2663 .try_clone()
2664 .context("failed to create new SharedMemory")?;
2665 self.vm_control_tube
2666 .send(&VmMemoryRequest::RegisterMemory {
2667 source: VmMemorySource::SharedMemory(shmem),
2668 dest: VmMemoryDestination::GuestPhysicalAddress(addr.0),
2669 prot: Protection::read(),
2670 cache: MemCacheType::CacheCoherent,
2671 })
2672 .context("failed to send request")?;
2673 match self.vm_control_tube.recv::<VmMemoryResponse>() {
2674 Ok(VmMemoryResponse::RegisterMemory { region_id, .. }) => {
2675 let cur_id = self.next_id;
2676 self.registered_regions.insert(cur_id, region_id);
2677 self.next_id += 1;
2678 Ok(cur_id)
2679 }
2680 res => bail!("Bad response: {:?}", res),
2681 }
2682 }
2683 }
2684
2685 let mut mapper = PciMmioMapperTube {
2686 supports_readonly_mapping,
2687 vm_control_tube,
2688 registered_regions: BTreeMap::new(),
2689 next_id: 0,
2690 };
2691
2692 loop {
2693 match hp_device_tube.recv() {
2694 Ok(cmd) => match cmd {
2695 PciRootCommand::Add(addr, device) => {
2696 if let Err(e) = pci_root.lock().add_device(addr, device, &mut mapper) {
2697 error!("failed to add hotplugged device to PCI root port: {}", e);
2698 }
2699 }
2700 PciRootCommand::AddBridge(pci_bus) => {
2701 if let Err(e) = pci_root.lock().add_bridge(pci_bus) {
2702 error!("failed to add hotplugged bridge to PCI root port: {}", e);
2703 }
2704 }
2705 PciRootCommand::Remove(addr) => {
2706 pci_root.lock().remove_device(addr);
2707 }
2708 PciRootCommand::Kill => break,
2709 },
2710 Err(e) => {
2711 error!("Error: pci root worker channel closed: {}", e);
2712 break;
2713 }
2714 }
2715 }
2716}
2717
2718#[cfg(target_arch = "x86_64")]
2719fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
2720 linux: &RunnableLinuxVm<V, Vcpu>,
2721 host_addr: PciAddress,
2722) -> Result<Arc<Mutex<dyn HotPlugBus>>> {
2723 for (_, hp_bus) in linux.hotplug_bus.iter() {
2724 if hp_bus.lock().is_match(host_addr).is_some() {
2725 return Ok(hp_bus.clone());
2726 }
2727 }
2728 Err(anyhow!("Failed to find a suitable hotplug bus"))
2729}
2730
2731#[cfg(target_arch = "x86_64")]
2732fn add_hotplug_device<V: VmArch, Vcpu: VcpuArch>(
2733 linux: &mut RunnableLinuxVm<V, Vcpu>,
2734 sys_allocator: &mut SystemAllocator,
2735 cfg: &Config,
2736 add_control_tube: &mut impl FnMut(AnyControlTube),
2737 hp_control_tube: &mpsc::Sender<PciRootCommand>,
2738 iommu_host_tube: Option<&Tube>,
2739 device: &HotPlugDeviceInfo,
2740 #[cfg(feature = "swap")] swap_controller: &mut Option<SwapController>,
2741 vfio_container_manager: &mut VfioContainerManager,
2742) -> Result<()> {
2743 let host_addr = PciAddress::from_path(&device.path)
2744 .context("failed to parse hotplug device's PCI address")?;
2745 let hp_bus = get_hp_bus(linux, host_addr)?;
2746
2747 let (hotplug_key, pci_address) = match device.device_type {
2748 HotPlugDeviceType::UpstreamPort | HotPlugDeviceType::DownstreamPort => {
2749 let (vm_host_tube, vm_device_tube) = Tube::pair().context("failed to create tube")?;
2750 add_control_tube(TaggedControlTube::Vm(vm_host_tube).into());
2751 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2752 add_control_tube(AnyControlTube::IrqTube(msi_host_tube));
2753 let pcie_host = PcieHostPort::new(device.path.as_path(), vm_device_tube)?;
2754 let (hotplug_key, pci_bridge) = match device.device_type {
2755 HotPlugDeviceType::UpstreamPort => {
2756 let hotplug_key = HotPlugKey::HostUpstreamPort { host_addr };
2757 let pcie_upstream_port = Arc::new(Mutex::new(PcieUpstreamPort::new_from_host(
2758 pcie_host, true,
2759 )?));
2760 let pci_bridge =
2761 Box::new(PciBridge::new(pcie_upstream_port.clone(), msi_device_tube));
2762 linux
2763 .hotplug_bus
2764 .insert(pci_bridge.get_secondary_num(), pcie_upstream_port);
2765 (hotplug_key, pci_bridge)
2766 }
2767 HotPlugDeviceType::DownstreamPort => {
2768 let hotplug_key = HotPlugKey::HostDownstreamPort { host_addr };
2769 let pcie_downstream_port = Arc::new(Mutex::new(
2770 PcieDownstreamPort::new_from_host(pcie_host, true)?,
2771 ));
2772 let pci_bridge = Box::new(PciBridge::new(
2773 pcie_downstream_port.clone(),
2774 msi_device_tube,
2775 ));
2776 linux
2777 .hotplug_bus
2778 .insert(pci_bridge.get_secondary_num(), pcie_downstream_port);
2779 (hotplug_key, pci_bridge)
2780 }
2781 _ => {
2782 bail!("Impossible to reach here")
2783 }
2784 };
2785 let pci_address = Arch::register_pci_device(
2786 linux,
2787 pci_bridge,
2788 None,
2789 sys_allocator,
2790 hp_control_tube,
2791 #[cfg(feature = "swap")]
2792 swap_controller,
2793 )?;
2794
2795 (hotplug_key, pci_address)
2796 }
2797 HotPlugDeviceType::EndPoint => {
2798 let hotplug_key = HotPlugKey::HostVfio { host_addr };
2799 let (vfio_device, jail, viommu_mapper) = create_vfio_device(
2800 cfg.jail_config.as_ref(),
2801 &linux.vm,
2802 sys_allocator,
2803 add_control_tube,
2804 &device.path,
2805 true,
2806 None,
2807 None,
2808 None,
2809 if iommu_host_tube.is_some() {
2810 IommuDevType::VirtioIommu
2811 } else {
2812 IommuDevType::NoIommu
2813 },
2814 None,
2815 vfio_container_manager,
2816 )?;
2817 let vfio_pci_device = match vfio_device {
2818 VfioDeviceVariant::Pci(pci) => Box::new(pci),
2819 VfioDeviceVariant::Platform(_) => bail!("vfio platform hotplug not supported"),
2820 };
2821 let pci_address = Arch::register_pci_device(
2822 linux,
2823 vfio_pci_device,
2824 jail,
2825 sys_allocator,
2826 hp_control_tube,
2827 #[cfg(feature = "swap")]
2828 swap_controller,
2829 )?;
2830 if let Some(iommu_host_tube) = iommu_host_tube {
2831 let endpoint_addr = pci_address.to_u32();
2832 let vfio_wrapper = viommu_mapper.context("expected mapper")?;
2833 let descriptor = vfio_wrapper.clone_as_raw_descriptor()?;
2834 let request =
2835 VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceAdd {
2836 endpoint_addr,
2837 wrapper_id: vfio_wrapper.id(),
2838 container: {
2839 unsafe { File::from_raw_descriptor(descriptor) }
2842 },
2843 });
2844 match virtio_iommu_request(iommu_host_tube, &request)
2845 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
2846 {
2847 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
2848 resp => bail!("Unexpected message response: {:?}", resp),
2849 }
2850 }
2851
2852 (hotplug_key, pci_address)
2853 }
2854 };
2855 hp_bus.lock().add_hotplug_device(hotplug_key, pci_address);
2856 if device.hp_interrupt {
2857 hp_bus.lock().hot_plug(pci_address)?;
2858 }
2859 Ok(())
2860}
2861
2862#[cfg(feature = "pci-hotplug")]
2863fn add_hotplug_net<V: VmArch, Vcpu: VcpuArch>(
2864 linux: &mut RunnableLinuxVm<V, Vcpu>,
2865 sys_allocator: &mut SystemAllocator,
2866 add_control_tube: &mut impl FnMut(AnyControlTube),
2867 hotplug_manager: &mut PciHotPlugManager,
2868 net_param: NetParameters,
2869) -> Result<u8> {
2870 let (msi_host_tube, msi_device_tube) = Tube::pair().context("create tube")?;
2871 add_control_tube(AnyControlTube::IrqTube(msi_host_tube));
2872 let (ioevent_host_tube, ioevent_device_tube) = Tube::pair().context("create tube")?;
2873 let ioevent_vm_memory_client = VmMemoryClient::new(ioevent_device_tube);
2874 add_control_tube(
2875 VmMemoryTube {
2876 tube: ioevent_host_tube,
2877 expose_with_viommu: false,
2878 }
2879 .into(),
2880 );
2881 let (vm_control_host_tube, vm_control_device_tube) = Tube::pair().context("create tube")?;
2882 add_control_tube(TaggedControlTube::Vm(vm_control_host_tube).into());
2883 let net_carrier_device = NetResourceCarrier::new(
2884 net_param,
2885 msi_device_tube,
2886 ioevent_vm_memory_client,
2887 vm_control_device_tube,
2888 );
2889 hotplug_manager.hotplug_device(
2890 vec![ResourceCarrier::VirtioNet(net_carrier_device)],
2891 linux,
2892 sys_allocator,
2893 )
2894}
2895
2896#[cfg(feature = "pci-hotplug")]
2897fn handle_hotplug_net_command<V: VmArch, Vcpu: VcpuArch>(
2898 net_cmd: NetControlCommand,
2899 linux: &mut RunnableLinuxVm<V, Vcpu>,
2900 sys_allocator: &mut SystemAllocator,
2901 add_control_tube: &mut impl FnMut(AnyControlTube),
2902 hotplug_manager: &mut PciHotPlugManager,
2903) -> VmResponse {
2904 match net_cmd {
2905 NetControlCommand::AddTap(tap_name) => handle_hotplug_net_add(
2906 linux,
2907 sys_allocator,
2908 add_control_tube,
2909 hotplug_manager,
2910 &tap_name,
2911 ),
2912 NetControlCommand::RemoveTap(bus) => {
2913 handle_hotplug_net_remove(linux, sys_allocator, hotplug_manager, bus)
2914 }
2915 }
2916}
2917
2918#[cfg(feature = "pci-hotplug")]
2919fn handle_hotplug_net_add<V: VmArch, Vcpu: VcpuArch>(
2920 linux: &mut RunnableLinuxVm<V, Vcpu>,
2921 sys_allocator: &mut SystemAllocator,
2922 add_control_tube: &mut impl FnMut(AnyControlTube),
2923 hotplug_manager: &mut PciHotPlugManager,
2924 tap_name: &str,
2925) -> VmResponse {
2926 let net_param_mode = NetParametersMode::TapName {
2927 tap_name: tap_name.to_owned(),
2928 mac: None,
2929 };
2930 let net_param = NetParameters {
2931 mode: net_param_mode,
2932 vhost_net: None,
2933 vq_pairs: None,
2934 packed_queue: false,
2935 pci_address: None,
2936 mrg_rxbuf: false,
2937 };
2938 let ret = add_hotplug_net(
2939 linux,
2940 sys_allocator,
2941 add_control_tube,
2942 hotplug_manager,
2943 net_param,
2944 );
2945
2946 match ret {
2947 Ok(pci_bus) => VmResponse::PciHotPlugResponse { bus: pci_bus },
2948 Err(e) => VmResponse::ErrString(format!("{e:?}")),
2949 }
2950}
2951
2952#[cfg(feature = "pci-hotplug")]
2953fn handle_hotplug_net_remove<V: VmArch, Vcpu: VcpuArch>(
2954 linux: &mut RunnableLinuxVm<V, Vcpu>,
2955 sys_allocator: &mut SystemAllocator,
2956 hotplug_manager: &mut PciHotPlugManager,
2957 bus: u8,
2958) -> VmResponse {
2959 match hotplug_manager.remove_hotplug_device(bus, linux, sys_allocator) {
2960 Ok(_) => VmResponse::Ok,
2961 Err(e) => VmResponse::ErrString(format!("{e:?}")),
2962 }
2963}
2964
2965#[cfg(target_arch = "x86_64")]
2966fn remove_hotplug_bridge<V: VmArch, Vcpu: VcpuArch>(
2967 linux: &RunnableLinuxVm<V, Vcpu>,
2968 sys_allocator: &mut SystemAllocator,
2969 buses_to_remove: &mut Vec<u8>,
2970 hotplug_key: HotPlugKey,
2971 child_bus: u8,
2972) -> Result<()> {
2973 for (bus_num, hp_bus) in linux.hotplug_bus.iter() {
2974 let mut hp_bus_lock = hp_bus.lock();
2975 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(hotplug_key) {
2976 sys_allocator.release_pci(pci_addr);
2977 hp_bus_lock.hot_unplug(pci_addr)?;
2978 buses_to_remove.push(child_bus);
2979 if hp_bus_lock.is_empty() {
2980 if let Some(hotplug_key) = hp_bus_lock.get_hotplug_key() {
2981 remove_hotplug_bridge(
2982 linux,
2983 sys_allocator,
2984 buses_to_remove,
2985 hotplug_key,
2986 *bus_num,
2987 )?;
2988 }
2989 }
2990 return Ok(());
2991 }
2992 }
2993
2994 Err(anyhow!(
2995 "Can not find device {:?} on hotplug buses",
2996 hotplug_key
2997 ))
2998}
2999
3000#[cfg(target_arch = "x86_64")]
3001fn remove_hotplug_device<V: VmArch, Vcpu: VcpuArch>(
3002 linux: &mut RunnableLinuxVm<V, Vcpu>,
3003 sys_allocator: &mut SystemAllocator,
3004 iommu_host_tube: Option<&Tube>,
3005 device: &HotPlugDeviceInfo,
3006) -> Result<()> {
3007 let host_addr = PciAddress::from_path(&device.path)?;
3008 let hotplug_key = match device.device_type {
3009 HotPlugDeviceType::UpstreamPort => HotPlugKey::HostUpstreamPort { host_addr },
3010 HotPlugDeviceType::DownstreamPort => HotPlugKey::HostDownstreamPort { host_addr },
3011 HotPlugDeviceType::EndPoint => HotPlugKey::HostVfio { host_addr },
3012 };
3013
3014 let hp_bus = linux
3015 .hotplug_bus
3016 .iter()
3017 .find(|(_, hp_bus)| {
3018 let hp_bus = hp_bus.lock();
3019 hp_bus.get_hotplug_device(hotplug_key).is_some()
3020 })
3021 .map(|(bus_num, hp_bus)| (*bus_num, hp_bus.clone()));
3022
3023 if let Some((bus_num, hp_bus)) = hp_bus {
3024 let mut buses_to_remove = Vec::new();
3025 let mut removed_key = None;
3026 let mut hp_bus_lock = hp_bus.lock();
3027 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(hotplug_key) {
3028 if let Some(iommu_host_tube) = iommu_host_tube {
3029 let request =
3030 VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceDel {
3031 endpoint_addr: pci_addr.to_u32(),
3032 });
3033 match virtio_iommu_request(iommu_host_tube, &request)
3034 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
3035 {
3036 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
3037 resp => bail!("Unexpected message response: {:?}", resp),
3038 }
3039 }
3040 let mut empty_simbling = true;
3041 if let Some(HotPlugKey::HostDownstreamPort { host_addr }) =
3042 hp_bus_lock.get_hotplug_key()
3043 {
3044 let addr_alias = host_addr;
3045 for (simbling_bus_num, hp_bus) in linux.hotplug_bus.iter() {
3046 if *simbling_bus_num != bus_num {
3047 let hp_bus_lock = hp_bus.lock();
3048 let hotplug_key = hp_bus_lock.get_hotplug_key();
3049 if let Some(HotPlugKey::HostDownstreamPort { host_addr }) = hotplug_key {
3050 if addr_alias.bus == host_addr.bus && !hp_bus_lock.is_empty() {
3051 empty_simbling = false;
3052 break;
3053 }
3054 }
3055 }
3056 }
3057 }
3058
3059 if !empty_simbling {
3063 hp_bus_lock.hot_unplug(pci_addr)?;
3064 }
3065
3066 sys_allocator.release_pci(pci_addr);
3067 if empty_simbling || hp_bus_lock.is_empty() {
3068 if let Some(hotplug_key) = hp_bus_lock.get_hotplug_key() {
3069 removed_key = Some(hotplug_key);
3070 remove_hotplug_bridge(
3071 linux,
3072 sys_allocator,
3073 &mut buses_to_remove,
3074 hotplug_key,
3075 bus_num,
3076 )?;
3077 }
3078 }
3079 }
3080
3081 if let Some(HotPlugKey::HostDownstreamPort { host_addr }) = removed_key {
3086 let addr_alias = host_addr;
3087 for (simbling_bus_num, hp_bus) in linux.hotplug_bus.iter() {
3088 if *simbling_bus_num != bus_num {
3089 let hp_bus_lock = hp_bus.lock();
3090 let hotplug_key = hp_bus_lock.get_hotplug_key();
3091 if let Some(HotPlugKey::HostDownstreamPort { host_addr }) = hotplug_key {
3092 if addr_alias.bus == host_addr.bus && hp_bus_lock.is_empty() {
3093 remove_hotplug_bridge(
3094 linux,
3095 sys_allocator,
3096 &mut buses_to_remove,
3097 hotplug_key.unwrap(),
3098 *simbling_bus_num,
3099 )?;
3100 }
3101 }
3102 }
3103 }
3104 }
3105 for bus in buses_to_remove.iter() {
3106 linux.hotplug_bus.remove(bus);
3107 }
3108 return Ok(());
3109 }
3110
3111 Err(anyhow!(
3112 "Can not find device {:?} on hotplug buses",
3113 hotplug_key
3114 ))
3115}
3116
3117pub fn trigger_vm_suspend_and_wait_for_entry(
3118 guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>,
3119 tube: &SendTube,
3120 response: vm_control::VmResponse,
3121 suspend_tube: Arc<Mutex<SendTube>>,
3122 pm: Option<Arc<Mutex<dyn PmResource + Send>>>,
3123) {
3124 let (lock, cvar) = &*guest_suspended_cvar;
3125 let mut guest_suspended = lock.lock();
3126
3127 *guest_suspended = false;
3128
3129 if let Some(pm) = pm {
3132 pm.lock().slpbtn_evt();
3133 } else {
3134 error!("generating sleepbtn during suspend not supported");
3135 }
3136
3137 let result = cvar.wait_timeout(guest_suspended, std::time::Duration::from_secs(15));
3140 guest_suspended = result.0;
3141
3142 if result.1.timed_out() {
3143 warn!("Guest suspension timeout - proceeding anyway");
3144 } else if *guest_suspended {
3145 info!("Guest suspended");
3146 }
3147
3148 if let Err(e) = suspend_tube.lock().send(&true) {
3149 error!("failed to trigger suspend event: {}", e);
3150 }
3151 if let Err(e) = tube.send(&response) {
3153 error!("failed to send VmResponse: {}", e);
3154 }
3155}
3156
3157#[cfg(feature = "pvclock")]
3158#[derive(Debug)]
3159enum PvClockAction {
3161 #[cfg(target_arch = "aarch64")]
3162 SetCounterOffset(u64),
3164}
3165
3166#[cfg(feature = "pvclock")]
3167fn send_pvclock_cmd(tube: &Tube, command: PvClockCommand) -> Result<Option<PvClockAction>> {
3168 tube.send(&command)
3169 .with_context(|| format!("failed to send pvclock command {command:?}"))?;
3170 let resp = tube
3171 .recv::<PvClockCommandResponse>()
3172 .context("failed to receive pvclock command response")?;
3173 match resp {
3174 PvClockCommandResponse::Err(e) => {
3175 bail!("pvclock encountered error on {:?}: {}", command, e);
3176 }
3177 PvClockCommandResponse::DeviceInactive => {
3178 warn!("Tried to send {command:?} but pvclock device was inactive");
3179 Ok(None)
3180 }
3181 PvClockCommandResponse::Resumed {
3182 total_suspended_ticks,
3183 } => {
3184 info!("{command:?} completed with {total_suspended_ticks} total_suspended_ticks");
3185 cfg_if::cfg_if! {
3186 if #[cfg(target_arch = "aarch64")] {
3187 Ok(Some(PvClockAction::SetCounterOffset(total_suspended_ticks)))
3188 } else {
3189 Ok(None)
3192 }
3193 }
3194 }
3195 PvClockCommandResponse::Ok => {
3196 info!("{command:?} completed with {resp:?}");
3197 Ok(None)
3198 }
3199 }
3200}
3201
3202#[cfg(target_arch = "x86_64")]
3203fn handle_hotplug_command<V: VmArch, Vcpu: VcpuArch>(
3204 linux: &mut RunnableLinuxVm<V, Vcpu>,
3205 sys_allocator: &mut SystemAllocator,
3206 cfg: &Config,
3207 add_control_tube: &mut impl FnMut(AnyControlTube),
3208 hp_control_tube: &mpsc::Sender<PciRootCommand>,
3209 iommu_host_tube: Option<&Tube>,
3210 device: &HotPlugDeviceInfo,
3211 add: bool,
3212 #[cfg(feature = "swap")] swap_controller: &mut Option<SwapController>,
3213 vfio_container_manager: &mut VfioContainerManager,
3214) -> VmResponse {
3215 let iommu_host_tube = if cfg.vfio_isolate_hotplug {
3216 iommu_host_tube
3217 } else {
3218 None
3219 };
3220
3221 let ret = if add {
3222 add_hotplug_device(
3223 linux,
3224 sys_allocator,
3225 cfg,
3226 add_control_tube,
3227 hp_control_tube,
3228 iommu_host_tube,
3229 device,
3230 #[cfg(feature = "swap")]
3231 swap_controller,
3232 vfio_container_manager,
3233 )
3234 } else {
3235 remove_hotplug_device(linux, sys_allocator, iommu_host_tube, device)
3236 };
3237
3238 match ret {
3239 Ok(()) => VmResponse::Ok,
3240 Err(e) => {
3241 error!("handle_hotplug_command failure: {}", e);
3242 VmResponse::Err(base::Error::new(libc::EINVAL))
3243 }
3244 }
3245}
3246
3247struct ControlLoopState<'a, V: VmArch, Vcpu: VcpuArch> {
3248 linux: &'a mut RunnableLinuxVm<V, Vcpu>,
3249 cfg: &'a Config,
3250 sys_allocator: &'a Arc<Mutex<SystemAllocator>>,
3251 control_tubes: &'a BTreeMap<usize, TaggedControlTube>,
3252 disk_host_tubes: &'a [Tube],
3253 #[cfg(feature = "audio")]
3254 snd_host_tubes: &'a [Tube],
3255 #[cfg(feature = "gpu")]
3256 gpu_control_tube: Option<&'a Tube>,
3257 #[cfg(feature = "usb")]
3258 usb_control_tube: &'a Tube,
3259 #[cfg(target_arch = "x86_64")]
3260 iommu_host_tube: &'a Option<Arc<Mutex<Tube>>>,
3261 #[cfg(target_arch = "x86_64")]
3262 hp_control_tube: &'a mpsc::Sender<PciRootCommand>,
3263 guest_suspended_cvar: &'a Option<Arc<(Mutex<bool>, Condvar)>>,
3264 #[cfg(feature = "pci-hotplug")]
3265 hotplug_manager: &'a mut Option<PciHotPlugManager>,
3266 #[cfg(feature = "swap")]
3267 swap_controller: &'a mut Option<SwapController>,
3268 vcpu_handles: &'a [(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
3269 #[cfg(feature = "balloon")]
3270 balloon_tube: Option<&'a mut BalloonTube>,
3271 device_ctrl_tube: &'a Tube,
3272 irq_handler_control: &'a Tube,
3273 #[cfg(any(target_arch = "x86_64", feature = "pci-hotplug"))]
3274 vm_memory_handler_control: &'a Tube,
3275 #[cfg(feature = "registered_events")]
3276 registered_evt_tubes: &'a mut HashMap<RegisteredEvent, HashSet<AddressedProtoTube>>,
3277 #[cfg(feature = "pvclock")]
3278 pvclock_host_tube: Option<Arc<Tube>>,
3279 vfio_container_manager: &'a mut VfioContainerManager,
3280 suspended_pvclock_state: &'a mut Option<hypervisor::ClockState>,
3281 vcpus_pid_tid: &'a BTreeMap<usize, (u32, u32)>,
3282 vm_memory_control_client: &'a VmMemoryClient,
3283}
3284
3285struct VmRequestResult {
3286 response: Option<VmResponse>,
3287 exit: bool,
3288}
3289
3290impl VmRequestResult {
3291 fn new(response: Option<VmResponse>, exit: bool) -> Self {
3292 VmRequestResult { response, exit }
3293 }
3294}
3295
3296fn process_vm_request<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
3297 state: &mut ControlLoopState<V, Vcpu>,
3298 id: usize,
3299 tube: &Tube,
3300 request: VmRequest,
3301 #[cfg_attr(
3302 not(any(target_arch = "x86_64", feature = "pci-hotplug")),
3303 allow(unused_variables, clippy::ptr_arg)
3304 )]
3305 add_tubes: &mut Vec<TaggedControlTube>,
3306) -> Result<VmRequestResult> {
3307 #[cfg(any(target_arch = "x86_64", feature = "pci-hotplug"))]
3308 let mut add_irq_control_tubes = Vec::new();
3309 #[cfg(any(target_arch = "x86_64", feature = "pci-hotplug"))]
3310 let mut add_vm_memory_control_tubes = Vec::new();
3311
3312 #[cfg(any(target_arch = "x86_64", feature = "pci-hotplug"))]
3313 let mut add_control_tube = |t| match t {
3314 AnyControlTube::DeviceControlTube(_) => {
3315 panic!("hotplugging DeviceControlTube not supported yet")
3316 }
3317 AnyControlTube::IrqTube(t) => add_irq_control_tubes.push(t),
3318 AnyControlTube::TaggedControlTube(t) => add_tubes.push(t),
3319 AnyControlTube::VmMemoryTube(t) => add_vm_memory_control_tubes.push(t),
3320 };
3321
3322 let response = match request {
3323 VmRequest::Exit => {
3324 return Ok(VmRequestResult::new(Some(VmResponse::Ok), true));
3325 }
3326 VmRequest::HotPlugVfioCommand { device, add } => {
3327 #[cfg(target_arch = "x86_64")]
3328 {
3329 handle_hotplug_command(
3330 state.linux,
3331 &mut state.sys_allocator.lock(),
3332 state.cfg,
3333 &mut add_control_tube,
3334 state.hp_control_tube,
3335 state.iommu_host_tube.as_ref().map(|t| t.lock()).as_deref(),
3336 &device,
3337 add,
3338 #[cfg(feature = "swap")]
3339 state.swap_controller,
3340 state.vfio_container_manager,
3341 )
3342 }
3343
3344 #[cfg(not(target_arch = "x86_64"))]
3345 {
3346 let _ = (device, add);
3348 let _ = &state.vfio_container_manager;
3349 VmResponse::Ok
3350 }
3351 }
3352 #[cfg(feature = "pci-hotplug")]
3353 VmRequest::HotPlugNetCommand(net_cmd) => {
3354 if let Some(hotplug_manager) = state.hotplug_manager.as_mut() {
3355 handle_hotplug_net_command(
3356 net_cmd,
3357 state.linux,
3358 &mut state.sys_allocator.lock(),
3359 &mut add_control_tube,
3360 hotplug_manager,
3361 )
3362 } else {
3363 VmResponse::ErrString("PCI hotplug is not enabled.".to_owned())
3364 }
3365 }
3366 #[cfg(feature = "registered_events")]
3367 VmRequest::RegisterListener { socket_addr, event } => {
3368 let (registered_tube, already_registered) =
3369 find_registered_tube(state.registered_evt_tubes, &socket_addr, event);
3370
3371 if !already_registered {
3372 let addr_tube = make_addr_tube_from_maybe_existing(registered_tube, socket_addr)?;
3373
3374 if let Some(tubes) = state.registered_evt_tubes.get_mut(&event) {
3375 tubes.insert(addr_tube);
3376 } else {
3377 state
3378 .registered_evt_tubes
3379 .insert(event, vec![addr_tube].into_iter().collect());
3380 }
3381 }
3382 VmResponse::Ok
3383 }
3384 #[cfg(feature = "registered_events")]
3385 VmRequest::UnregisterListener { socket_addr, event } => {
3386 if let Some(tubes) = state.registered_evt_tubes.get_mut(&event) {
3387 tubes.retain(|t| t.socket_addr != socket_addr);
3388 }
3389 state
3390 .registered_evt_tubes
3391 .retain(|_, tubes| !tubes.is_empty());
3392 VmResponse::Ok
3393 }
3394 #[cfg(feature = "registered_events")]
3395 VmRequest::Unregister { socket_addr } => {
3396 for (_, tubes) in state.registered_evt_tubes.iter_mut() {
3397 tubes.retain(|t| t.socket_addr != socket_addr);
3398 }
3399 state
3400 .registered_evt_tubes
3401 .retain(|_, tubes| !tubes.is_empty());
3402 VmResponse::Ok
3403 }
3404 #[cfg(feature = "balloon")]
3405 VmRequest::BalloonCommand(cmd) => {
3406 if let Some(tube) = state.balloon_tube.as_mut() {
3407 let Some((r, key)) = tube.send_cmd(cmd, Some(id)) else {
3408 return Ok(VmRequestResult::new(None, false));
3409 };
3410 if key != id {
3411 let Some(TaggedControlTube::Vm(tube)) = state.control_tubes.get(&key) else {
3412 return Ok(VmRequestResult::new(None, false));
3413 };
3414 if let Err(e) = tube.send(&r) {
3415 error!("failed to send VmResponse: {}", e);
3416 }
3417 return Ok(VmRequestResult::new(None, false));
3418 }
3419 r
3420 } else {
3421 VmResponse::Err(base::Error::new(libc::ENOTSUP))
3422 }
3423 }
3424 VmRequest::VcpuPidTid => VmResponse::VcpuPidTidResponse {
3425 pid_tid_map: state.vcpus_pid_tid.clone(),
3426 },
3427 VmRequest::Throttle(vcpu, cycles) => {
3428 vcpu::kick_vcpu(
3429 &state.vcpu_handles.get(vcpu),
3430 state.linux.irq_chip.as_irq_chip(),
3431 VcpuControl::Throttle(cycles),
3432 );
3433 return Ok(VmRequestResult::new(None, false));
3434 }
3435 VmRequest::RegisterMemory {
3436 fd,
3437 offset,
3438 range_start,
3439 range_end,
3440 cache_coherent,
3441 } => {
3442 if range_start >= range_end {
3443 error!("range_start >= range_end");
3444 return Ok(VmRequestResult::new(
3445 Some(VmResponse::Err(base::Error::new(libc::EINVAL))),
3446 false,
3447 ));
3448 }
3449 let source = VmMemorySource::Descriptor {
3450 descriptor: fd,
3451 offset,
3452 size: range_end - range_start,
3453 };
3454 let dest = VmMemoryDestination::GuestPhysicalAddress(range_start);
3455 let cache_type = if cache_coherent {
3456 MemCacheType::CacheCoherent
3457 } else {
3458 MemCacheType::CacheNonCoherent
3459 };
3460 match state.vm_memory_control_client.register_memory(
3461 source,
3462 dest,
3463 Protection::read_write(),
3464 cache_type,
3465 ) {
3466 Ok(region_id) => VmResponse::RegisterMemory2 {
3467 region_id: region_id.0 .0,
3468 },
3469 Err(e) => VmResponse::ErrString(format!("register memory failed: {e:?}")),
3470 }
3471 }
3472 VmRequest::UnregisterMemory { region_id } => {
3473 let mem_region_id = VmMemoryRegionId(GuestAddress(region_id));
3474 match state
3475 .vm_memory_control_client
3476 .unregister_memory(mem_region_id)
3477 {
3478 Ok(_) => VmResponse::Ok,
3479 Err(e) => VmResponse::ErrString(format!("unregister memory failed: {e:?}")),
3480 }
3481 }
3482 _ => {
3483 if !state.cfg.force_s2idle {
3484 #[cfg(feature = "pvclock")]
3485 if let Some(ref pvclock_host_tube) = state.pvclock_host_tube {
3486 if let VmRequest::ResumeVcpus = request {
3488 let cmd = PvClockCommand::Resume;
3489 match send_pvclock_cmd(pvclock_host_tube, cmd.clone()) {
3490 Ok(action) => {
3491 info!("{:?} command successfully processed", cmd);
3492 if let Some(action) = action {
3493 match action {
3494 #[cfg(target_arch = "aarch64")]
3495 PvClockAction::SetCounterOffset(offset) => {
3496 state.linux.vm.set_counter_offset(offset)?;
3497 }
3498 }
3499 }
3500 }
3501 Err(e) => error!("{:?} command failed: {:#}", cmd, e),
3502 };
3503 }
3504 }
3505 }
3506 let kick_all_vcpus = |msg| {
3507 if let VcpuControl::RunState(VmRunMode::Running) = msg {
3508 for dev in &state.linux.resume_notify_devices {
3509 dev.lock().resume_imminent();
3510 }
3511 }
3512 vcpu::kick_all_vcpus(state.vcpu_handles, state.linux.irq_chip.as_irq_chip(), msg);
3513 };
3514 let response = request.execute(
3515 &state.linux.vm,
3516 state.disk_host_tubes,
3517 #[cfg(feature = "audio")]
3518 state.snd_host_tubes,
3519 #[cfg(not(feature = "audio"))]
3520 &[],
3521 &mut state.linux.pm,
3522 #[cfg(feature = "gpu")]
3523 state.gpu_control_tube,
3524 #[cfg(not(feature = "gpu"))]
3525 None,
3526 #[cfg(feature = "usb")]
3527 Some(state.usb_control_tube),
3528 #[cfg(not(feature = "usb"))]
3529 None,
3530 &mut state.linux.bat_control,
3531 kick_all_vcpus,
3532 |index, msg| {
3533 vcpu::kick_vcpu(
3534 &state.vcpu_handles.get(index),
3535 state.linux.irq_chip.as_irq_chip(),
3536 msg,
3537 )
3538 },
3539 state.cfg.force_s2idle,
3540 #[cfg(feature = "swap")]
3541 state.swap_controller.as_ref(),
3542 state.device_ctrl_tube,
3543 state.vcpu_handles.len(),
3544 state.irq_handler_control,
3545 || state.linux.irq_chip.snapshot(state.linux.vcpu_count),
3546 state.suspended_pvclock_state,
3547 );
3548 if state.cfg.force_s2idle {
3549 if let VmRequest::SuspendVcpus = request {
3550 let send_tube = tube.try_clone_send_tube().unwrap();
3552 let suspend_tube = state.linux.suspend_tube.0.clone();
3553 let guest_suspended_cvar = state.guest_suspended_cvar.clone();
3554 let pm = state.linux.pm.clone();
3555
3556 std::thread::Builder::new()
3557 .name("s2idle_wait".to_owned())
3558 .spawn(move || {
3559 trigger_vm_suspend_and_wait_for_entry(
3560 guest_suspended_cvar.unwrap(),
3561 &send_tube,
3562 response,
3563 suspend_tube,
3564 pm,
3565 )
3566 })
3567 .context("failed to spawn s2idle_wait thread")?;
3568
3569 return Ok(VmRequestResult::new(None, false));
3572 }
3573 } else {
3574 #[cfg(feature = "pvclock")]
3575 if let Some(ref pvclock_host_tube) = state.pvclock_host_tube {
3576 if let VmRequest::SuspendVcpus = request {
3578 let cmd = PvClockCommand::Suspend;
3579 match send_pvclock_cmd(pvclock_host_tube, cmd.clone()) {
3580 Ok(action) => {
3581 info!("{:?} command successfully processed", cmd);
3582 if let Some(action) = action {
3583 error!("Unexpected action {:?} requested for suspend", action);
3584 }
3585 }
3586 Err(e) => error!("{:?} command failed: {:#}", cmd, e),
3587 };
3588 }
3589 }
3590 }
3591 response
3592 }
3593 };
3594
3595 cfg_if::cfg_if! {
3596 if #[cfg(any(target_arch = "x86_64", feature = "pci-hotplug"))] {
3597 if !add_irq_control_tubes.is_empty() {
3598 state
3599 .irq_handler_control
3600 .send(&IrqHandlerRequest::AddIrqControlTubes(
3601 add_irq_control_tubes,
3602 ))?;
3603 }
3604 if !add_vm_memory_control_tubes.is_empty() {
3605 state
3606 .vm_memory_handler_control
3607 .send(&VmMemoryHandlerRequest::AddControlTubes(
3608 add_vm_memory_control_tubes,
3609 ))?;
3610 }
3611 }
3612 }
3613
3614 Ok(VmRequestResult::new(Some(response), false))
3615}
3616
3617fn process_vm_control_event<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
3618 state: &mut ControlLoopState<V, Vcpu>,
3619 id: usize,
3620 socket: &TaggedControlTube,
3621) -> Result<(bool, Vec<usize>, Vec<TaggedControlTube>)> {
3622 let mut vm_control_ids_to_remove = Vec::new();
3623 let mut add_tubes = Vec::new();
3624 match socket {
3625 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
3626 Ok(request) => {
3627 let res = process_vm_request(state, id, tube, request, &mut add_tubes)?;
3628
3629 if let Some(response) = res.response {
3630 if let Err(e) = tube.send(&response) {
3631 error!("failed to send VmResponse: {}", e);
3632 }
3633 }
3634
3635 if res.exit {
3636 return Ok((true, Vec::new(), Vec::new()));
3637 }
3638 }
3639 Err(e) => {
3640 if let TubeError::Disconnected = e {
3641 vm_control_ids_to_remove.push(id);
3642 } else {
3643 error!("failed to recv VmRequest: {}", e);
3644 }
3645 }
3646 },
3647 TaggedControlTube::VmMsync(tube) => match tube.recv::<VmMemoryMappingRequest>() {
3648 Ok(request) => {
3649 let response = request.execute(&mut state.linux.vm);
3650 if let Err(e) = tube.send(&response) {
3651 error!("failed to send VmMsyncResponse: {}", e);
3652 }
3653 }
3654 Err(e) => {
3655 if let TubeError::Disconnected = e {
3656 vm_control_ids_to_remove.push(id);
3657 } else {
3658 error!("failed to recv VmMsyncRequest: {}", e);
3659 }
3660 }
3661 },
3662 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
3663 Ok(request) => {
3664 let response =
3665 request.execute(&mut state.linux.vm, &mut state.sys_allocator.lock());
3666 if let Err(e) = tube.send(&response) {
3667 error!("failed to send VmResponse: {}", e);
3668 }
3669 }
3670 Err(e) => {
3671 if let TubeError::Disconnected = e {
3672 vm_control_ids_to_remove.push(id);
3673 } else {
3674 error!("failed to recv VmResponse: {}", e);
3675 }
3676 }
3677 },
3678 }
3679
3680 Ok((false, vm_control_ids_to_remove, add_tubes))
3681}
3682
3683#[cfg(feature = "registered_events")]
3684struct AddressedProtoTube {
3685 tube: Rc<ProtoTube>,
3686 socket_addr: String,
3687}
3688
3689#[cfg(feature = "registered_events")]
3690impl PartialEq for AddressedProtoTube {
3691 fn eq(&self, other: &Self) -> bool {
3692 self.socket_addr == other.socket_addr
3693 }
3694}
3695
3696#[cfg(feature = "registered_events")]
3697impl Eq for AddressedProtoTube {}
3698
3699#[cfg(feature = "registered_events")]
3700impl Hash for AddressedProtoTube {
3701 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
3702 self.socket_addr.hash(state);
3703 }
3704}
3705
3706#[cfg(feature = "registered_events")]
3707impl AddressedProtoTube {
3708 pub fn send<M: protobuf::Message>(&self, msg: &M) -> Result<(), base::TubeError> {
3709 self.tube.send_proto(msg)
3710 }
3711}
3712
3713#[cfg(feature = "registered_events")]
3714fn find_registered_tube<'a>(
3715 registered_tubes: &'a HashMap<RegisteredEvent, HashSet<AddressedProtoTube>>,
3716 socket_addr: &str,
3717 event: RegisteredEvent,
3718) -> (Option<&'a Rc<ProtoTube>>, bool) {
3719 let mut registered_tube: Option<&Rc<ProtoTube>> = None;
3720 let mut already_registered = false;
3721 'outer: for (evt, addr_tubes) in registered_tubes {
3722 for addr_tube in addr_tubes {
3723 if addr_tube.socket_addr == socket_addr {
3724 if *evt == event {
3725 already_registered = true;
3726 break 'outer;
3727 }
3728 registered_tube = Some(&addr_tube.tube);
3734 }
3735 }
3736 }
3737 (registered_tube, already_registered)
3738}
3739
3740#[cfg(feature = "registered_events")]
3741fn make_addr_tube_from_maybe_existing(
3742 tube: Option<&Rc<ProtoTube>>,
3743 addr: String,
3744) -> Result<AddressedProtoTube> {
3745 if let Some(registered_tube) = tube {
3746 Ok(AddressedProtoTube {
3747 tube: registered_tube.clone(),
3748 socket_addr: addr,
3749 })
3750 } else {
3751 let sock = UnixSeqpacket::connect(addr.clone())
3752 .with_context(|| format!("failed to connect to registered listening socket {addr}"))?;
3753 let tube = ProtoTube::from(Tube::try_from(sock)?);
3754 Ok(AddressedProtoTube {
3755 tube: Rc::new(tube),
3756 socket_addr: addr,
3757 })
3758 }
3759}
3760
3761fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
3762 mut linux: RunnableLinuxVm<V, Vcpu>,
3763 sys_allocator: SystemAllocator,
3764 cfg: Config,
3765 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
3766 all_control_tubes: Vec<AnyControlTube>,
3767 #[cfg(feature = "usb")] usb_control_tube: Tube,
3768 vm_evt_rdtube: RecvTube,
3769 vm_evt_wrtube: SendTube,
3770 sigchld_fd: SignalFd,
3771 gralloc: RutabagaGralloc,
3772 vcpu_ids: Vec<usize>,
3773 iommu_host_tube: Option<Tube>,
3774 #[cfg(target_arch = "x86_64")] hp_control_tube: mpsc::Sender<PciRootCommand>,
3775 #[cfg(target_arch = "x86_64")] hp_thread: std::thread::JoinHandle<()>,
3776 #[cfg(feature = "pci-hotplug")] mut hotplug_manager: Option<PciHotPlugManager>,
3777 #[allow(unused_mut)] #[cfg(feature = "swap")]
3779 mut swap_controller: Option<SwapController>,
3780 #[cfg(feature = "registered_events")] reg_evt_rdtube: RecvTube,
3781 guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
3782 metrics_tube: RecvTube,
3783 mut vfio_container_manager: VfioContainerManager,
3784 mut worker_process_pids: BTreeSet<Pid>,
3786 #[cfg(target_arch = "aarch64")] vcpu_domain_paths: BTreeMap<usize, PathBuf>,
3787) -> Result<ExitState> {
3788 #[cfg(feature = "balloon")]
3790 let mut balloon_host_tube = None;
3791 let mut disk_host_tubes = Vec::new();
3792 #[cfg(feature = "gpu")]
3793 let mut gpu_control_tube = None;
3794 #[cfg(feature = "pvclock")]
3795 let mut pvclock_host_tube = None;
3796 #[cfg(feature = "audio")]
3797 let mut snd_host_tubes = Vec::new();
3798 let mut irq_control_tubes = Vec::new();
3799 let mut vm_memory_control_tubes = Vec::new();
3800 let mut control_tubes = Vec::new();
3801 for t in all_control_tubes {
3802 match t {
3803 #[cfg(feature = "balloon")]
3804 AnyControlTube::DeviceControlTube(DeviceControlTube::Balloon(t)) => {
3805 assert!(balloon_host_tube.is_none());
3806 balloon_host_tube = Some(t)
3807 }
3808 AnyControlTube::DeviceControlTube(DeviceControlTube::Disk(t)) => {
3809 disk_host_tubes.push(t)
3810 }
3811 #[cfg(feature = "gpu")]
3812 AnyControlTube::DeviceControlTube(DeviceControlTube::Gpu(t)) => {
3813 assert!(gpu_control_tube.is_none());
3814 gpu_control_tube = Some(t)
3815 }
3816 #[cfg(feature = "pvclock")]
3817 AnyControlTube::DeviceControlTube(DeviceControlTube::PvClock(t)) => {
3818 assert!(pvclock_host_tube.is_none());
3819 pvclock_host_tube = Some(Arc::new(t))
3820 }
3821 #[cfg(feature = "audio")]
3822 AnyControlTube::DeviceControlTube(DeviceControlTube::Snd(t)) => {
3823 snd_host_tubes.push(t);
3824 }
3825 AnyControlTube::IrqTube(t) => irq_control_tubes.push(t),
3826 AnyControlTube::TaggedControlTube(t) => control_tubes.push(t),
3827 AnyControlTube::VmMemoryTube(t) => vm_memory_control_tubes.push(t),
3828 }
3829 }
3830
3831 #[cfg(feature = "gdb")]
3832 let (to_gdb_channel, gdb) = if let Some(port) = cfg.gdb {
3833 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
3835 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
3836 let (to_gdb_channel, from_vcpu_channel) = mpsc::channel();
3838 (
3839 Some(to_gdb_channel),
3840 Some((port, gdb_control_tube, from_vcpu_channel)),
3841 )
3842 } else {
3843 (None, None)
3844 };
3845
3846 #[derive(EventToken)]
3847 enum Token {
3848 VmEvent,
3849 Suspend,
3850 ChildSignal,
3851 VmControlServer,
3852 VmControl {
3853 id: usize,
3854 },
3855 #[cfg(feature = "registered_events")]
3856 RegisteredEvent,
3857 #[cfg(feature = "balloon")]
3858 BalloonTube,
3859 }
3860 stdin()
3861 .set_raw_mode()
3862 .expect("failed to set terminal raw mode");
3863
3864 let sys_allocator_mutex = Arc::new(Mutex::new(sys_allocator));
3865 let iommu_host_tube = iommu_host_tube.map(|t| Arc::new(Mutex::new(t)));
3866
3867 let wait_ctx = WaitContext::build_with(&[
3868 (&linux.suspend_tube.1, Token::Suspend),
3869 (&sigchld_fd, Token::ChildSignal),
3870 (&vm_evt_rdtube, Token::VmEvent),
3871 #[cfg(feature = "registered_events")]
3872 (®_evt_rdtube, Token::RegisteredEvent),
3873 ])
3874 .context("failed to build wait context")?;
3875
3876 if let Some(socket_server) = &control_server_socket {
3877 wait_ctx
3878 .add(socket_server, Token::VmControlServer)
3879 .context("failed to add descriptor to wait context")?;
3880 }
3881 let mut control_tubes = BTreeMap::from_iter(control_tubes.into_iter().enumerate());
3882 let mut next_control_id = control_tubes.len();
3883 for (id, socket) in control_tubes.iter() {
3884 wait_ctx
3885 .add(socket.as_ref(), Token::VmControl { id: *id })
3886 .context("failed to add descriptor to wait context")?;
3887 }
3888
3889 #[cfg(feature = "balloon")]
3890 let mut balloon_tube = balloon_host_tube
3891 .map(|tube| -> Result<BalloonTube> {
3892 wait_ctx
3893 .add(&tube, Token::BalloonTube)
3894 .context("failed to add descriptor to wait context")?;
3895 Ok(BalloonTube::new(tube))
3896 })
3897 .transpose()
3898 .context("failed to create balloon tube")?;
3899
3900 if cfg.jail_config.is_some() {
3901 drop_capabilities().context("failed to drop process capabilities")?;
3903 }
3904
3905 let (device_ctrl_tube, device_ctrl_resp) = Tube::pair().context("failed to create tube")?;
3906 linux.devices_thread = match create_devices_worker_thread(
3908 linux.io_bus.clone(),
3909 linux.mmio_bus.clone(),
3910 device_ctrl_resp,
3911 ) {
3912 Ok(join_handle) => Some(join_handle),
3913 Err(e) => {
3914 return Err(anyhow!("Failed to start devices thread: {}", e));
3915 }
3916 };
3917
3918 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
3919 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
3920
3921 if !linux
3922 .vm
3923 .get_hypervisor()
3924 .check_capability(HypervisorCap::ImmediateExit)
3925 {
3926 return Err(anyhow!(
3927 "missing required hypervisor capability ImmediateExit"
3928 ));
3929 }
3930
3931 vcpu::setup_vcpu_signal_handler()?;
3932
3933 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
3934 Some(vec) => vec.into_iter().map(Some).collect(),
3935 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
3936 };
3937 if cfg.core_scheduling && cfg.per_vm_core_scheduling {
3942 if let Err(e) = enable_core_scheduling() {
3943 error!("Failed to enable core scheduling: {}", e);
3944 }
3945 }
3946
3947 let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
3949 None => None,
3950 Some(cgroup_path) => {
3951 match File::create(cgroup_path.join("tasks")) {
3953 Ok(file) => Some(file),
3954 Err(_) => {
3955 info!(
3956 "Unable to open tasks file in cgroup: {}, trying CgroupV2",
3957 cgroup_path.display()
3958 );
3959 None
3960 }
3961 }
3962 }
3963 };
3964
3965 let mut vcpu_cgroup_v2_files: std::collections::BTreeMap<usize, File> = BTreeMap::new();
3967 #[cfg(target_arch = "aarch64")]
3968 for (vcpu_id, vcpu_domain_path) in vcpu_domain_paths.iter() {
3969 let vcpu_cgroup_v2_file = File::create(vcpu_domain_path.join("cgroup.threads"))
3970 .with_context(|| {
3971 format!(
3972 "failed to create vcpu-cgroup-path {}",
3973 vcpu_domain_path.join("cgroup.threads").display(),
3974 )
3975 })?;
3976 vcpu_cgroup_v2_files.insert(*vcpu_id, vcpu_cgroup_v2_file);
3977 }
3978
3979 #[cfg(target_arch = "x86_64")]
3980 let bus_lock_ratelimit_ctrl: Arc<Mutex<Ratelimit>> = Arc::new(Mutex::new(Ratelimit::new()));
3981 #[cfg(target_arch = "x86_64")]
3982 if cfg.bus_lock_ratelimit > 0 {
3983 let bus_lock_ratelimit = cfg.bus_lock_ratelimit;
3984 if linux.vm.check_capability(VmCap::BusLockDetect) {
3985 info!("Hypervisor support bus lock detect");
3986 linux
3987 .vm
3988 .enable_capability(VmCap::BusLockDetect, 0)
3989 .expect("kvm: Failed to enable bus lock detection cap");
3990 info!("Hypervisor enabled bus lock detect");
3991 bus_lock_ratelimit_ctrl
3992 .lock()
3993 .ratelimit_set_speed(bus_lock_ratelimit);
3994 } else {
3995 bail!("Kvm: bus lock detection unsuported");
3996 }
3997 }
3998
3999 #[cfg(target_os = "android")]
4000 android::set_process_profiles(&cfg.task_profiles)?;
4001
4002 #[allow(unused_mut)]
4003 let mut run_mode = if cfg.suspended {
4004 device_ctrl_tube
4006 .send(&DeviceControlCommand::SleepDevices)
4007 .context("send command to devices control socket")?;
4008 match device_ctrl_tube
4009 .recv()
4010 .context("receive from devices control socket")?
4011 {
4012 VmResponse::Ok => (),
4013 resp => bail!("device sleep failed: {}", resp),
4014 }
4015 VmRunMode::Suspending
4016 } else {
4017 VmRunMode::Running
4018 };
4019 #[cfg(feature = "gdb")]
4020 if to_gdb_channel.is_some() {
4021 run_mode = VmRunMode::Breakpoint;
4023 }
4024 let (run_mode, post_restore_run_mode) = if cfg.restore_path.is_some() {
4026 (VmRunMode::Suspending, run_mode)
4027 } else {
4028 (run_mode, run_mode)
4029 };
4030
4031 assert_eq!(vcpus.len(), linux.vcpu_init.len());
4033
4034 let (vcpu_pid_tid_sender, vcpu_pid_tid_receiver) = mpsc::channel();
4035 for ((cpu_id, vcpu), vcpu_init) in vcpus.into_iter().enumerate().zip(linux.vcpu_init.drain(..))
4036 {
4037 let vcpu_cgroup_file: Option<File>;
4038 if let Some(cgroup_file) = &vcpu_cgroup_tasks_file {
4039 vcpu_cgroup_file = Some(cgroup_file.try_clone().unwrap())
4040 } else if !cfg.cpu_freq_domains.is_empty() {
4041 vcpu_cgroup_file = Some(
4042 (vcpu_cgroup_v2_files.remove(&cpu_id).unwrap())
4043 .try_clone()
4044 .unwrap(),
4045 )
4046 } else {
4047 vcpu_cgroup_file = None
4048 };
4049
4050 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
4051 let vcpu_affinity = match &linux.vcpu_affinity {
4052 Some(VcpuAffinity::Global(v)) => v.clone(),
4053 Some(VcpuAffinity::PerVcpu(m)) => m.get(&cpu_id).cloned().unwrap_or_default(),
4054 None => Default::default(),
4055 };
4056
4057 #[cfg(target_arch = "x86_64")]
4058 let vcpu_hybrid_type = if !cfg.vcpu_hybrid_type.is_empty() {
4059 Some(*cfg.vcpu_hybrid_type.get(&cpu_id).unwrap())
4060 } else {
4061 None
4062 };
4063
4064 #[cfg(target_arch = "x86_64")]
4065 let cpu_config = Some(CpuConfigX86_64::new(
4066 cfg.force_calibrated_tsc_leaf,
4067 cfg.host_cpu_topology,
4068 cfg.enable_hwp,
4069 cfg.no_smt,
4070 cfg.itmt,
4071 vcpu_hybrid_type,
4072 ));
4073 #[cfg(target_arch = "x86_64")]
4074 let bus_lock_ratelimit_ctrl = Arc::clone(&bus_lock_ratelimit_ctrl);
4075
4076 #[cfg(target_arch = "aarch64")]
4077 let cpu_config = None;
4078
4079 #[cfg(target_arch = "riscv64")]
4080 let cpu_config = Some(CpuConfigRiscv64::new(vcpu_init.fdt_address));
4081
4082 let handle = vcpu::run_vcpu(
4083 cpu_id,
4084 vcpu_ids[cpu_id],
4085 vcpu,
4086 vcpu_init,
4087 linux.vm.try_clone().context("failed to clone vm")?,
4088 linux
4089 .irq_chip
4090 .try_box_clone()
4091 .context("failed to clone irqchip")?,
4092 linux.vcpu_count,
4093 linux.rt_cpus.contains(&cpu_id),
4094 vcpu_affinity,
4095 linux.delay_rt,
4096 vcpu_thread_barrier.clone(),
4097 (*linux.io_bus).clone(),
4098 (*linux.mmio_bus).clone(),
4099 (*linux.hypercall_bus).clone(),
4100 vm_evt_wrtube
4101 .try_clone()
4102 .context("failed to clone vm event tube")?,
4103 from_main_channel,
4104 #[cfg(feature = "gdb")]
4105 to_gdb_channel.clone(),
4106 cfg.core_scheduling,
4107 cfg.per_vm_core_scheduling,
4108 cpu_config,
4109 match vcpu_cgroup_file {
4110 None => None,
4111 Some(ref f) => Some(
4112 f.try_clone()
4113 .context("failed to clone vcpu cgroup tasks file")?,
4114 ),
4115 },
4116 #[cfg(target_arch = "x86_64")]
4117 bus_lock_ratelimit_ctrl,
4118 run_mode,
4119 cfg.boost_uclamp,
4120 vcpu_pid_tid_sender.clone(),
4121 )?;
4122 vcpu_handles.push((handle, to_vcpu_channel));
4123 }
4124
4125 let mut vcpus_pid_tid = BTreeMap::new();
4126 for _ in 0..vcpu_handles.len() {
4127 let vcpu_pid_tid: VcpuPidTid = vcpu_pid_tid_receiver
4128 .recv()
4129 .context("failed receiving vcpu pid/tid")?;
4130 if vcpus_pid_tid
4131 .insert(
4132 vcpu_pid_tid.vcpu_id,
4133 (vcpu_pid_tid.process_id, vcpu_pid_tid.thread_id),
4134 )
4135 .is_some()
4136 {
4137 return Err(anyhow!(
4138 "Vcpu {} returned more than 1 PID and TID",
4139 vcpu_pid_tid.vcpu_id
4140 ));
4141 }
4142 }
4143
4144 #[cfg(feature = "gdb")]
4145 if let Some((gdb_port_num, gdb_control_tube, from_vcpu_channel)) = gdb {
4147 let to_vcpu_channels = vcpu_handles
4148 .iter()
4149 .map(|(_handle, channel)| channel.clone())
4150 .collect();
4151 let target = GdbStub::new(gdb_control_tube, to_vcpu_channels, from_vcpu_channel);
4152 std::thread::Builder::new()
4153 .name("gdb".to_owned())
4154 .spawn(move || gdb_thread(target, gdb_port_num))
4155 .context("failed to spawn GDB thread")?;
4156 };
4157
4158 let (irq_handler_control, irq_handler_control_for_thread) = Tube::pair()?;
4159 let sys_allocator_for_thread = sys_allocator_mutex.clone();
4160 let irq_chip_for_thread = linux.irq_chip.try_box_clone()?;
4161 let irq_handler_thread = std::thread::Builder::new()
4162 .name("irq_handler_thread".into())
4163 .spawn(move || {
4164 irq_handler_thread(
4165 irq_control_tubes,
4166 irq_chip_for_thread,
4167 sys_allocator_for_thread,
4168 irq_handler_control_for_thread,
4169 )
4170 })
4171 .unwrap();
4172
4173 let (vm_memory_control_tube1, vm_memory_control_tube_2) = Tube::pair()?;
4174 vm_memory_control_tubes.push(VmMemoryTube {
4175 tube: vm_memory_control_tube1,
4176 expose_with_viommu: false,
4177 });
4178 let vm_memory_control_client = VmMemoryClient::new(vm_memory_control_tube_2);
4179 let (vm_memory_handler_control, vm_memory_handler_control_for_thread) = Tube::pair()?;
4180 let vm_memory_handler_thread = std::thread::Builder::new()
4181 .name("vm_memory_handler_thread".into())
4182 .spawn({
4183 let vm = linux.vm.try_clone().context("failed to clone Vm")?;
4184 let sys_allocator_mutex = sys_allocator_mutex.clone();
4185 let iommu_client = iommu_host_tube
4186 .as_ref()
4187 .map(|t| VmMemoryRequestIommuClient::new(t.clone()));
4188 move || {
4189 vm_memory_handler_thread(
4190 vm_memory_control_tubes,
4191 vm,
4192 sys_allocator_mutex,
4193 gralloc,
4194 iommu_client,
4195 vm_memory_handler_control_for_thread,
4196 )
4197 }
4198 })
4199 .unwrap();
4200
4201 vcpu_thread_barrier.wait();
4202
4203 let mut suspended_pvclock_state: Option<hypervisor::ClockState> = None;
4205
4206 if let Some(path) = &cfg.restore_path {
4209 vm_control::do_restore(
4210 path,
4211 |msg| vcpu::kick_all_vcpus(&vcpu_handles, linux.irq_chip.as_irq_chip(), msg),
4212 |msg, index| {
4213 vcpu::kick_vcpu(&vcpu_handles.get(index), linux.irq_chip.as_irq_chip(), msg)
4214 },
4215 &irq_handler_control,
4216 &device_ctrl_tube,
4217 linux.vcpu_count,
4218 |image| {
4219 linux
4220 .irq_chip
4221 .try_box_clone()?
4222 .restore(image, linux.vcpu_count)
4223 },
4224 false,
4225 &mut suspended_pvclock_state,
4226 &linux.vm,
4227 )?;
4228 vcpu::kick_all_vcpus(
4230 &vcpu_handles,
4231 linux.irq_chip.as_irq_chip(),
4232 VcpuControl::RunState(post_restore_run_mode),
4233 )
4234 }
4235
4236 #[cfg(feature = "swap")]
4237 if let Some(swap_controller) = &swap_controller {
4238 swap_controller
4239 .on_static_devices_setup_complete()
4240 .context("static device setup complete")?;
4241 }
4242
4243 let metrics_thread = if metrics::is_initialized() {
4244 Some(
4245 std::thread::Builder::new()
4246 .name("metrics_thread".into())
4247 .spawn(move || {
4248 if let Err(e) = MetricsController::new(vec![metrics_tube]).run() {
4249 error!("Metrics controller error: {:?}", e);
4250 }
4251 })
4252 .context("metrics thread failed")?,
4253 )
4254 } else {
4255 None
4256 };
4257
4258 let mut exit_state = ExitState::Stop;
4259 let mut pvpanic_code = PvPanicCode::Unknown;
4260 #[cfg(feature = "registered_events")]
4261 let mut registered_evt_tubes: HashMap<RegisteredEvent, HashSet<AddressedProtoTube>> =
4262 HashMap::new();
4263
4264 'wait: loop {
4265 let events = {
4266 match wait_ctx.wait() {
4267 Ok(v) => v,
4268 Err(e) => {
4269 error!("failed to poll: {}", e);
4270 break;
4271 }
4272 }
4273 };
4274
4275 let mut vm_control_ids_to_remove = Vec::new();
4276 for event in events.iter().filter(|e| e.is_readable) {
4277 match event.token {
4278 #[cfg(feature = "registered_events")]
4279 Token::RegisteredEvent => match reg_evt_rdtube.recv::<RegisteredEventWithData>() {
4280 Ok(reg_evt) => {
4281 let evt = reg_evt.into_event();
4282 let mut tubes_to_remove: Vec<String> = Vec::new();
4283 if let Some(tubes) = registered_evt_tubes.get_mut(&evt) {
4284 for tube in tubes.iter() {
4285 if let Err(e) = tube.send(®_evt.into_proto()) {
4286 warn!(
4287 "failed to send registered event {:?} to {}, removing from \
4288 registrations: {}",
4289 reg_evt, tube.socket_addr, e
4290 );
4291 tubes_to_remove.push(tube.socket_addr.clone());
4292 }
4293 }
4294 }
4295 for tube_addr in tubes_to_remove {
4296 for tubes in registered_evt_tubes.values_mut() {
4297 tubes.retain(|t| t.socket_addr != tube_addr);
4298 }
4299 }
4300 registered_evt_tubes.retain(|_, tubes| !tubes.is_empty());
4301 }
4302 Err(e) => {
4303 warn!("failed to recv RegisteredEvent: {}", e);
4304 }
4305 },
4306 Token::VmEvent => {
4307 let mut break_to_wait: bool = true;
4308 match vm_evt_rdtube.recv::<VmEventType>() {
4309 Ok(vm_event) => match vm_event {
4310 VmEventType::Exit => {
4311 info!("vcpu requested shutdown");
4312 exit_state = ExitState::Stop;
4313 }
4314 VmEventType::Reset => {
4315 info!("vcpu requested reset");
4316 exit_state = ExitState::Reset;
4317 }
4318 VmEventType::Crash => {
4319 info!("vcpu crashed");
4320 exit_state = ExitState::Crash;
4321 }
4322 VmEventType::GuestPanic => {
4323 info!("guest panic event");
4324 exit_state = ExitState::GuestPanic;
4325 }
4326 VmEventType::DeviceCrashed => {
4327 info!("device crashed");
4328 exit_state = ExitState::Crash;
4329 }
4330 VmEventType::Panic(panic_code) => {
4331 pvpanic_code = PvPanicCode::from_u8(panic_code);
4332 info!("Guest reported panic [Code: {}]", pvpanic_code);
4333 break_to_wait = false;
4334 }
4335 VmEventType::WatchdogReset => {
4336 info!("vcpu stall detected");
4337 exit_state = ExitState::WatchdogReset;
4338 }
4339 },
4340 Err(e) => {
4341 warn!("failed to recv VmEvent: {}", e);
4342 }
4343 }
4344 if break_to_wait {
4345 if pvpanic_code == PvPanicCode::Panicked {
4346 exit_state = ExitState::GuestPanic;
4347 }
4348 break 'wait;
4349 }
4350 }
4351 Token::Suspend => match linux.suspend_tube.1.recv::<bool>() {
4352 Ok(is_suspend_request) => {
4353 let mode = if is_suspend_request {
4354 VmRunMode::Suspending
4355 } else {
4356 for dev in &linux.resume_notify_devices {
4357 dev.lock().resume_imminent();
4358 }
4359 VmRunMode::Running
4360 };
4361 info!("VM requested {}", mode);
4362 vcpu::kick_all_vcpus(
4363 &vcpu_handles,
4364 linux.irq_chip.as_irq_chip(),
4365 VcpuControl::RunState(mode),
4366 );
4367 }
4368 Err(err) => {
4369 warn!("Failed to read suspend tube {:?}", err);
4370 }
4371 },
4372 Token::ChildSignal => {
4373 let mut do_exit = false;
4377 while let Some(siginfo) =
4378 sigchld_fd.read().context("failed to read signalfd")?
4379 {
4380 let pid = siginfo.ssi_pid;
4381 let pid_label = match linux.pid_debug_label_map.get(&pid) {
4382 Some(label) => format!("{label} (pid {pid})"),
4383 None => format!("pid {pid}"),
4384 };
4385
4386 #[cfg(feature = "swap")]
4388 if siginfo.ssi_code == libc::CLD_STOPPED
4389 || siginfo.ssi_code == libc::CLD_CONTINUED
4390 {
4391 continue;
4392 }
4393
4394 if cfg.jail_config.is_none()
4401 && !linux.pid_debug_label_map.contains_key(&pid)
4402 && siginfo.ssi_signo == libc::SIGCHLD as u32
4403 && siginfo.ssi_code == libc::CLD_EXITED
4404 && siginfo.ssi_status == 0
4405 {
4406 continue;
4407 }
4408
4409 if siginfo.ssi_signo == libc::SIGCHLD as u32
4411 && siginfo.ssi_code == libc::CLD_EXITED
4412 && siginfo.ssi_status == 0
4413 && worker_process_pids.remove(&(pid as Pid))
4414 {
4415 info!("child {pid} exited successfully");
4416 continue;
4417 }
4418
4419 if siginfo.ssi_signo == libc::SIGCHLD as u32
4420 && (siginfo.ssi_code == libc::CLD_KILLED
4421 || siginfo.ssi_code == libc::CLD_DUMPED)
4422 {
4423 error!(
4424 "child {} killed by signal {} ({})",
4425 pid_label,
4426 siginfo.ssi_status,
4427 base::signal::Signal::try_from(siginfo.ssi_status)
4428 .map(|s| s.to_string())
4429 .unwrap_or("unknown".to_string()),
4430 );
4431 } else {
4432 error!(
4433 "child {} exited: signo {}, status {}, code {}",
4434 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
4435 );
4436 }
4437 do_exit = true;
4438 }
4439 if do_exit {
4440 exit_state = ExitState::Crash;
4441 break 'wait;
4442 }
4443 }
4444 Token::VmControlServer => {
4445 if let Some(socket_server) = &control_server_socket {
4446 match socket_server.accept() {
4447 Ok(socket) => {
4448 let id = next_control_id;
4449 next_control_id += 1;
4450 wait_ctx
4451 .add(&socket, Token::VmControl { id })
4452 .context("failed to add descriptor to wait context")?;
4453 control_tubes
4454 .insert(id, TaggedControlTube::Vm(Tube::try_from(socket)?));
4455 }
4456 Err(e) => error!("failed to accept socket: {}", e),
4457 }
4458 }
4459 }
4460 Token::VmControl { id } => {
4461 if let Some(socket) = control_tubes.get(&id) {
4462 let mut state = ControlLoopState {
4463 linux: &mut linux,
4464 cfg: &cfg,
4465 sys_allocator: &sys_allocator_mutex,
4466 control_tubes: &control_tubes,
4467 disk_host_tubes: &disk_host_tubes[..],
4468 #[cfg(feature = "audio")]
4469 snd_host_tubes: &snd_host_tubes[..],
4470 #[cfg(feature = "gpu")]
4471 gpu_control_tube: gpu_control_tube.as_ref(),
4472 #[cfg(feature = "usb")]
4473 usb_control_tube: &usb_control_tube,
4474 #[cfg(target_arch = "x86_64")]
4475 iommu_host_tube: &iommu_host_tube,
4476 #[cfg(target_arch = "x86_64")]
4477 hp_control_tube: &hp_control_tube,
4478 guest_suspended_cvar: &guest_suspended_cvar,
4479 #[cfg(feature = "pci-hotplug")]
4480 hotplug_manager: &mut hotplug_manager,
4481 #[cfg(feature = "swap")]
4482 swap_controller: &mut swap_controller,
4483 vcpu_handles: &vcpu_handles,
4484 #[cfg(feature = "balloon")]
4485 balloon_tube: balloon_tube.as_mut(),
4486 device_ctrl_tube: &device_ctrl_tube,
4487 irq_handler_control: &irq_handler_control,
4488 #[cfg(any(target_arch = "x86_64", feature = "pci-hotplug"))]
4489 vm_memory_handler_control: &vm_memory_handler_control,
4490 #[cfg(feature = "registered_events")]
4491 registered_evt_tubes: &mut registered_evt_tubes,
4492 #[cfg(feature = "pvclock")]
4493 pvclock_host_tube: pvclock_host_tube.clone(),
4494 vfio_container_manager: &mut vfio_container_manager,
4495 suspended_pvclock_state: &mut suspended_pvclock_state,
4496 vcpus_pid_tid: &vcpus_pid_tid,
4497 vm_memory_control_client: &vm_memory_control_client,
4498 };
4499 let (exit_requested, mut ids_to_remove, add_tubes) =
4500 process_vm_control_event(&mut state, id, socket)?;
4501 if exit_requested {
4502 break 'wait;
4503 }
4504 vm_control_ids_to_remove.append(&mut ids_to_remove);
4505 for socket in add_tubes {
4506 let id = next_control_id;
4507 next_control_id += 1;
4508 wait_ctx
4509 .add(socket.as_ref(), Token::VmControl { id })
4510 .context(
4511 "failed to add hotplug vfio-pci descriptor to wait context",
4512 )?;
4513 control_tubes.insert(id, socket);
4514 }
4515 }
4516 }
4517 #[cfg(feature = "balloon")]
4518 Token::BalloonTube => {
4519 match balloon_tube.as_mut().expect("missing balloon tube").recv() {
4520 Ok(resp) => {
4521 for (resp, idx) in resp {
4522 if let Some(TaggedControlTube::Vm(tube)) = control_tubes.get(&idx) {
4523 if let Err(e) = tube.send(&resp) {
4524 error!("failed to send VmResponse: {}", e);
4525 }
4526 } else {
4527 error!("Bad tube index {}", idx);
4528 }
4529 }
4530 }
4531 Err(err) => {
4532 error!("Error processing balloon tube {:?}", err)
4533 }
4534 }
4535 }
4536 }
4537 }
4538
4539 remove_hungup_and_drained_tubes(
4540 &events,
4541 &wait_ctx,
4542 &mut control_tubes,
4543 vm_control_ids_to_remove,
4544 |token: &Token| {
4545 if let Token::VmControl { id } = token {
4546 return Some(*id);
4547 }
4548 None
4549 },
4550 )?;
4551 }
4552
4553 vcpu::kick_all_vcpus(
4554 &vcpu_handles,
4555 linux.irq_chip.as_irq_chip(),
4556 VcpuControl::RunState(VmRunMode::Exiting),
4557 );
4558 for (handle, _) in vcpu_handles {
4559 if let Err(e) = handle.join() {
4560 error!("failed to join vcpu thread: {:?}", e);
4561 }
4562 }
4563
4564 if let Err(e) = vcpu::remove_vcpu_signal_handler() {
4566 error!("failed to remove vcpu thread signal handler: {:#}", e);
4567 }
4568
4569 #[cfg(feature = "swap")]
4571 drop(swap_controller);
4572
4573 #[cfg(target_arch = "x86_64")]
4575 {
4576 let _ = hp_control_tube.send(PciRootCommand::Kill);
4577 if let Err(e) = hp_thread.join() {
4578 error!("failed to join hotplug thread: {:?}", e);
4579 }
4580 }
4581
4582 if linux.devices_thread.is_some() {
4583 if let Err(e) = device_ctrl_tube.send(&DeviceControlCommand::Exit) {
4584 error!("failed to stop device control loop: {}", e);
4585 };
4586 if let Some(thread) = linux.devices_thread.take() {
4587 if let Err(e) = thread.join() {
4588 error!("failed to exit devices thread: {:?}", e);
4589 }
4590 }
4591 }
4592
4593 match Arc::try_unwrap(std::mem::replace(
4598 &mut linux.mmio_bus,
4599 Arc::new(Bus::new(BusType::Mmio)),
4600 )) {
4601 Ok(_) => {}
4602 Err(_) => panic!("internal error: mmio_bus had more than one reference at shutdown"),
4603 }
4604 match Arc::try_unwrap(std::mem::replace(
4605 &mut linux.io_bus,
4606 Arc::new(Bus::new(BusType::Io)),
4607 )) {
4608 Ok(_) => {}
4609 Err(_) => panic!("internal error: io_bus had more than one reference at shutdown"),
4610 }
4611
4612 mem::drop(linux);
4615
4616 if let Err(e) = vm_memory_handler_control.send(&VmMemoryHandlerRequest::Exit) {
4621 error!(
4622 "failed to request exit from VM Memory handler thread: {}",
4623 e
4624 );
4625 }
4626 if let Err(e) = vm_memory_handler_thread.join() {
4627 error!("failed to exit VM Memory handler thread: {:?}", e);
4628 }
4629
4630 if let Err(e) = irq_handler_control.send(&IrqHandlerRequest::Exit) {
4632 error!("failed to request exit from IRQ handler thread: {}", e);
4633 }
4634 if let Err(e) = irq_handler_thread.join() {
4635 error!("failed to exit irq handler thread: {:?}", e);
4636 }
4637
4638 #[cfg(feature = "pci-hotplug")]
4641 mem::drop(hotplug_manager);
4642
4643 metrics::get_destructor().cleanup();
4646 if let Some(metrics_thread) = metrics_thread {
4647 if let Err(e) = metrics_thread.join() {
4648 error!("failed to exit irq handler thread: {:?}", e);
4649 }
4650 }
4651
4652 stdin()
4653 .set_canon_mode()
4654 .expect("failed to restore canonical mode for terminal");
4655
4656 Ok(exit_state)
4657}
4658
4659#[derive(EventToken)]
4660enum IrqHandlerToken {
4661 IrqFd { index: IrqEventIndex },
4662 VmIrq { id: usize },
4663 DelayedIrqFd,
4664 HandlerControl,
4665}
4666
4667fn irq_handler_thread(
4669 irq_control_tubes: Vec<Tube>,
4670 mut irq_chip: Box<dyn IrqChipArch + 'static>,
4671 sys_allocator_mutex: Arc<Mutex<SystemAllocator>>,
4672 handler_control: Tube,
4673) -> anyhow::Result<()> {
4674 let wait_ctx = WaitContext::build_with(&[(
4675 handler_control.get_read_notifier(),
4676 IrqHandlerToken::HandlerControl,
4677 )])
4678 .context("failed to build wait context")?;
4679
4680 if let Some(delayed_ioapic_irq_trigger) = irq_chip.irq_delayed_event_token()? {
4681 wait_ctx
4682 .add(&delayed_ioapic_irq_trigger, IrqHandlerToken::DelayedIrqFd)
4683 .context("failed to add descriptor to wait context")?;
4684 }
4685
4686 let mut irq_event_tokens = irq_chip
4687 .irq_event_tokens()
4688 .context("failed get event tokens from irqchip")?;
4689
4690 for (index, _gsi, evt) in irq_event_tokens.iter() {
4691 wait_ctx
4692 .add(evt, IrqHandlerToken::IrqFd { index: *index })
4693 .context("failed to add irq chip event tokens to wait context")?;
4694 }
4695
4696 let mut irq_control_tubes = BTreeMap::from_iter(irq_control_tubes.into_iter().enumerate());
4697 let mut next_control_id = irq_control_tubes.len();
4698 for (id, socket) in irq_control_tubes.iter() {
4699 wait_ctx
4700 .add(
4701 socket.get_read_notifier(),
4702 IrqHandlerToken::VmIrq { id: *id },
4703 )
4704 .context("irq control tubes to wait context")?;
4705 }
4706
4707 'wait: loop {
4708 let events = {
4709 match wait_ctx.wait() {
4710 Ok(v) => v,
4711 Err(e) => {
4712 error!("failed to poll: {}", e);
4713 break 'wait;
4714 }
4715 }
4716 };
4717 let token_count = events.len();
4718 let mut vm_irq_tubes_to_remove = Vec::new();
4719 let mut notify_control_on_iteration_end = false;
4720
4721 for event in events.iter().filter(|e| e.is_readable) {
4722 match event.token {
4723 IrqHandlerToken::HandlerControl => {
4724 match handler_control.recv::<IrqHandlerRequest>() {
4725 Ok(request) => {
4726 match request {
4727 IrqHandlerRequest::Exit => break 'wait,
4728 IrqHandlerRequest::AddIrqControlTubes(tubes) => {
4729 for socket in tubes {
4730 let id = next_control_id;
4731 next_control_id += 1;
4732 wait_ctx
4733 .add(
4734 socket.get_read_notifier(),
4735 IrqHandlerToken::VmIrq { id },
4736 )
4737 .context("failed to add new IRQ control Tube to wait context")?;
4738 irq_control_tubes.insert(id, socket);
4739 }
4740 }
4741 IrqHandlerRequest::RefreshIrqEventTokens => {
4742 for (_index, _gsi, evt) in irq_event_tokens.iter() {
4743 wait_ctx.delete(evt).context(
4744 "failed to remove irq chip event \
4745 token from wait context",
4746 )?;
4747 }
4748
4749 irq_event_tokens = irq_chip
4750 .irq_event_tokens()
4751 .context("failed get event tokens from irqchip")?;
4752 for (index, _gsi, evt) in irq_event_tokens.iter() {
4753 wait_ctx
4754 .add(evt, IrqHandlerToken::IrqFd { index: *index })
4755 .context(
4756 "failed to add irq chip event \
4757 tokens to wait context",
4758 )?;
4759 }
4760
4761 if let Err(e) = handler_control
4762 .send(&IrqHandlerResponse::IrqEventTokenRefreshComplete)
4763 {
4764 error!(
4765 "failed to notify IRQ event token refresh \
4766 was completed: {}",
4767 e
4768 );
4769 }
4770 }
4771 IrqHandlerRequest::WakeAndNotifyIteration => {
4772 notify_control_on_iteration_end = true;
4773 }
4774 }
4775 }
4776 Err(e) => {
4777 if let TubeError::Disconnected = e {
4778 panic!("irq handler control tube disconnected.");
4779 } else {
4780 error!("failed to recv IrqHandlerRequest: {}", e);
4781 }
4782 }
4783 }
4784 }
4785 IrqHandlerToken::VmIrq { id } => {
4786 if let Some(tube) = irq_control_tubes.get(&id) {
4787 handle_irq_tube_request(
4788 &sys_allocator_mutex,
4789 &mut irq_chip,
4790 &mut vm_irq_tubes_to_remove,
4791 &wait_ctx,
4792 tube,
4793 id,
4794 );
4795 }
4796 }
4797 IrqHandlerToken::IrqFd { index } => {
4798 if let Err(e) = irq_chip.service_irq_event(index) {
4799 error!("failed to signal irq {}: {}", index, e);
4800 }
4801 }
4802 IrqHandlerToken::DelayedIrqFd => {
4803 if let Err(e) = irq_chip.process_delayed_irq_events() {
4804 warn!("can't deliver delayed irqs: {}", e);
4805 }
4806 }
4807 }
4808 }
4809
4810 if notify_control_on_iteration_end {
4811 if let Err(e) = handler_control.send(&IrqHandlerResponse::HandlerIterationComplete(
4812 token_count - 1,
4813 )) {
4814 error!(
4815 "failed to notify on iteration completion (snapshotting may fail): {}",
4816 e
4817 );
4818 }
4819 }
4820
4821 remove_hungup_and_drained_tubes(
4822 &events,
4823 &wait_ctx,
4824 &mut irq_control_tubes,
4825 vm_irq_tubes_to_remove,
4826 |token: &IrqHandlerToken| {
4827 if let IrqHandlerToken::VmIrq { id } = token {
4828 return Some(*id);
4829 }
4830 None
4831 },
4832 )?;
4833 if events.iter().any(|e| {
4834 e.is_hungup && !e.is_readable && matches!(e.token, IrqHandlerToken::HandlerControl)
4835 }) {
4836 error!("IRQ handler control hung up but did not request an exit.");
4837 break 'wait;
4838 }
4839 }
4840 Ok(())
4841}
4842
4843fn handle_irq_tube_request(
4844 sys_allocator_mutex: &Arc<Mutex<SystemAllocator>>,
4845 irq_chip: &mut Box<dyn IrqChipArch + 'static>,
4846 vm_irq_tubes_to_remove: &mut Vec<usize>,
4847 wait_ctx: &WaitContext<IrqHandlerToken>,
4848 tube: &Tube,
4849 tube_index: usize,
4850) {
4851 match tube.recv::<VmIrqRequest>() {
4852 Ok(request) => {
4853 let response = {
4854 request.execute(
4855 |setup| match setup {
4856 IrqSetup::Event(irq, ev, device_id, queue_id, device_name) => {
4857 let irq_evt = devices::IrqEdgeEvent::from_event(ev.try_clone()?);
4858 let source = IrqEventSource {
4859 device_id,
4860 queue_id,
4861 device_name,
4862 };
4863 if let Some(event_index) =
4864 irq_chip.register_edge_irq_event(irq, &irq_evt, source)?
4865 {
4866 if let Err(e) =
4867 wait_ctx.add(ev, IrqHandlerToken::IrqFd { index: event_index })
4868 {
4869 warn!("failed to add IrqFd to poll context: {}", e);
4870 return Err(e);
4871 }
4872 }
4873 Ok(())
4874 }
4875 IrqSetup::Route(route) => irq_chip.route_irq(route),
4876 IrqSetup::UnRegister(irq, ev) => {
4877 let irq_evt = devices::IrqEdgeEvent::from_event(ev.try_clone()?);
4878 irq_chip.unregister_edge_irq_event(irq, &irq_evt)
4879 }
4880 },
4881 &mut sys_allocator_mutex.lock(),
4882 )
4883 };
4884 if let Err(e) = tube.send(&response) {
4885 error!("failed to send VmIrqResponse: {}", e);
4886 }
4887 }
4888 Err(e) => {
4889 if let TubeError::Disconnected = e {
4890 vm_irq_tubes_to_remove.push(tube_index);
4891 } else {
4892 error!("failed to recv VmIrqRequest: {}", e);
4893 }
4894 }
4895 }
4896}
4897
4898#[derive(serde::Serialize, serde::Deserialize)]
4900pub enum VmMemoryHandlerRequest {
4901 AddControlTubes(Vec<VmMemoryTube>),
4903 Exit,
4905}
4906
4907fn vm_memory_handler_thread(
4908 control_tubes: Vec<VmMemoryTube>,
4909 mut vm: impl Vm,
4910 sys_allocator_mutex: Arc<Mutex<SystemAllocator>>,
4911 mut gralloc: RutabagaGralloc,
4912 mut iommu_client: Option<VmMemoryRequestIommuClient>,
4913 handler_control: Tube,
4914) -> anyhow::Result<()> {
4915 #[derive(EventToken)]
4916 enum Token {
4917 VmControl { id: usize },
4918 HandlerControl,
4919 }
4920
4921 let wait_ctx =
4922 WaitContext::build_with(&[(handler_control.get_read_notifier(), Token::HandlerControl)])
4923 .context("failed to build wait context")?;
4924 let mut control_tubes = BTreeMap::from_iter(control_tubes.into_iter().enumerate());
4925 let mut next_control_id = control_tubes.len();
4926 for (id, socket) in control_tubes.iter() {
4927 wait_ctx
4928 .add(socket.as_ref(), Token::VmControl { id: *id })
4929 .context("failed to add descriptor to wait context")?;
4930 }
4931
4932 let mut region_state: VmMemoryRegionState = Default::default();
4933
4934 'wait: loop {
4935 let events = {
4936 match wait_ctx.wait() {
4937 Ok(v) => v,
4938 Err(e) => {
4939 error!("failed to poll: {}", e);
4940 break;
4941 }
4942 }
4943 };
4944
4945 let mut vm_control_ids_to_remove = Vec::new();
4946 for event in events.iter().filter(|e| e.is_readable) {
4947 match event.token {
4948 Token::HandlerControl => match handler_control.recv::<VmMemoryHandlerRequest>() {
4949 Ok(request) => match request {
4950 VmMemoryHandlerRequest::Exit => break 'wait,
4951 VmMemoryHandlerRequest::AddControlTubes(tubes) => {
4952 for socket in tubes {
4953 let id = next_control_id;
4954 next_control_id += 1;
4955 wait_ctx
4956 .add(socket.get_read_notifier(), Token::VmControl { id })
4957 .context(
4958 "failed to add new vm memory control Tube to wait context",
4959 )?;
4960 control_tubes.insert(id, socket);
4961 }
4962 }
4963 },
4964 Err(e) => {
4965 if let TubeError::Disconnected = e {
4966 panic!("vm memory control tube disconnected.");
4967 } else {
4968 error!("failed to recv VmMemoryHandlerRequest: {}", e);
4969 }
4970 }
4971 },
4972 Token::VmControl { id } => {
4973 if let Some(VmMemoryTube {
4974 tube,
4975 expose_with_viommu,
4976 }) = control_tubes.get(&id)
4977 {
4978 match tube.recv::<VmMemoryRequest>() {
4979 Ok(request) => {
4980 let response = request.execute(
4981 tube,
4982 &mut vm,
4983 &mut sys_allocator_mutex.lock(),
4984 &mut gralloc,
4985 if *expose_with_viommu {
4986 iommu_client.as_mut()
4987 } else {
4988 None
4989 },
4990 &mut region_state,
4991 );
4992 if let Err(e) = tube.send(&response) {
4993 error!("failed to send VmMemoryControlResponse: {}", e);
4994 }
4995 }
4996 Err(e) => {
4997 if let TubeError::Disconnected = e {
4998 vm_control_ids_to_remove.push(id);
4999 } else {
5000 error!("failed to recv VmMemoryControlRequest: {}", e);
5001 }
5002 }
5003 }
5004 }
5005 }
5006 }
5007 }
5008
5009 remove_hungup_and_drained_tubes(
5010 &events,
5011 &wait_ctx,
5012 &mut control_tubes,
5013 vm_control_ids_to_remove,
5014 |token: &Token| {
5015 if let Token::VmControl { id } = token {
5016 return Some(*id);
5017 }
5018 None
5019 },
5020 )?;
5021 if events
5022 .iter()
5023 .any(|e| e.is_hungup && !e.is_readable && matches!(e.token, Token::HandlerControl))
5024 {
5025 error!("vm memory handler control hung up but did not request an exit.");
5026 break 'wait;
5027 }
5028 }
5029 Ok(())
5030}
5031
5032fn remove_hungup_and_drained_tubes<T, U>(
5039 events: &SmallVec<[TriggeredEvent<T>; 16]>,
5040 wait_ctx: &WaitContext<T>,
5041 tubes: &mut BTreeMap<usize, U>,
5042 mut tube_ids_to_remove: Vec<usize>,
5043 get_tube_id: fn(token: &T) -> Option<usize>,
5044) -> anyhow::Result<()>
5045where
5046 T: EventToken,
5047 U: ReadNotifier,
5048{
5049 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
5058 if let Some(id) = get_tube_id(&event.token) {
5059 tube_ids_to_remove.push(id);
5060 }
5061 }
5062
5063 tube_ids_to_remove.dedup();
5064 for id in tube_ids_to_remove {
5065 if let Some(socket) = tubes.remove(&id) {
5074 wait_ctx
5075 .delete(socket.get_read_notifier())
5076 .context("failed to remove descriptor from wait context")?;
5077 }
5078 }
5079 Ok(())
5080}
5081
5082fn jail_and_start_vu_device<T: VirtioDeviceBuilder>(
5089 jail_config: Option<&JailConfig>,
5090 params: T,
5091 vhost: &str,
5092 name: &str,
5093) -> anyhow::Result<(libc::pid_t, Option<Box<dyn std::any::Any>>)> {
5094 let mut keep_rds = Vec::new();
5095
5096 base::syslog::push_descriptors(&mut keep_rds);
5097 cros_tracing::push_descriptors!(&mut keep_rds);
5098 metrics::push_descriptors(&mut keep_rds);
5099
5100 let jail_type = VirtioDeviceType::VhostUser;
5101
5102 let jail = params
5105 .create_jail(jail_config, jail_type)
5106 .with_context(|| format!("failed to create jail for {name}"))?
5107 .ok_or(())
5108 .or_else(|_| Minijail::new())
5109 .with_context(|| format!("failed to create empty jail for {name}"))?;
5110
5111 let device = params
5114 .create_vhost_user_device(&mut keep_rds)
5115 .context("failed to create vhost-user device")?;
5116 let mut listener =
5117 VhostUserListener::new(vhost).context("failed to create the vhost listener")?;
5118 keep_rds.push(listener.as_raw_descriptor());
5119 let parent_resources = listener.take_parent_process_resources();
5120
5121 let ex = Executor::new().context("Failed to create an Executor")?;
5124 keep_rds.extend(ex.as_raw_descriptors());
5125
5126 keep_rds.sort_unstable();
5128 keep_rds.dedup();
5129
5130 match unsafe { jail.fork(Some(&keep_rds)).context("error while forking")? } {
5133 0 => {
5134 let _ = std::mem::ManuallyDrop::new(parent_resources);
5139
5140 if unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) } < 0 {
5143 panic!("call to prctl(PR_SET_DEATHSIG, SIGKILL) failed. Aborting child process.");
5144 }
5145
5146 const MAX_LEN: usize = 15; let debug_label_trimmed = &name.as_bytes()[..std::cmp::min(MAX_LEN, name.len())];
5149 let thread_name = CString::new(debug_label_trimmed).unwrap();
5150 let _ = unsafe { libc::pthread_setname_np(libc::pthread_self(), thread_name.as_ptr()) };
5154
5155 let res = match listener.run_device(ex, device) {
5157 Ok(()) => 0,
5158 Err(e) => {
5159 error!("error while running device {}: {:#}", name, e);
5160 1
5161 }
5162 };
5163 unsafe { libc::exit(res) };
5165 }
5166 pid => {
5167 info!("process for device {} (PID {}) started", &name, pid);
5173 #[cfg(feature = "seccomp_trace")]
5174 debug!(
5175 "seccomp_trace {{\"event\": \"minijail_fork\", \"pid\": {}, \"name\": \"{}\", \"jail_addr\": \"0x{:x}\"}}",
5176 pid,
5177 &name,
5178 read_jail_addr(&jail)
5179 );
5180 Ok((pid, parent_resources))
5181 }
5182 }
5183}
5184
5185fn process_vhost_user_control_request(tube: Tube, disk_host_tubes: &[Tube]) -> Result<()> {
5186 let command = tube
5187 .recv::<VmRequest>()
5188 .context("failed to receive VmRequest")?;
5189 let resp = match command {
5190 VmRequest::DiskCommand {
5191 disk_index,
5192 ref command,
5193 } => match &disk_host_tubes.get(disk_index) {
5194 Some(tube) => handle_disk_command(command, tube),
5195 None => VmResponse::Err(base::Error::new(libc::ENODEV)),
5196 },
5197 request => {
5198 error!(
5199 "Request {:?} currently not supported in vhost user backend",
5200 request
5201 );
5202 VmResponse::Err(base::Error::new(libc::EPERM))
5203 }
5204 };
5205
5206 tube.send(&resp).context("failed to send VmResponse")?;
5207 Ok(())
5208}
5209
5210fn start_vhost_user_control_server(
5211 control_server_socket: UnlinkUnixSeqpacketListener,
5212 disk_host_tubes: Vec<Tube>,
5213) {
5214 info!("Start vhost-user control server");
5215 loop {
5216 match control_server_socket.accept() {
5217 Ok(socket) => {
5218 let tube = match Tube::try_from(socket) {
5219 Ok(tube) => tube,
5220 Err(e) => {
5221 error!("failed to open tube: {:#}", e);
5222 return;
5223 }
5224 };
5225 if let Err(e) = process_vhost_user_control_request(tube, &disk_host_tubes) {
5226 error!("failed to process control request: {:#}", e);
5227 }
5228 }
5229 Err(e) => {
5230 error!("failed to establish connection: {}", e);
5231 }
5232 }
5233 }
5234}
5235
5236pub fn start_devices(opts: DevicesCommand) -> anyhow::Result<()> {
5237 if let Some(async_executor) = opts.async_executor {
5238 Executor::set_default_executor_kind(async_executor)
5239 .context("Failed to set the default async executor")?;
5240 }
5241
5242 struct DeviceJailInfo {
5243 name: String,
5245 _drop_resources: Option<Box<dyn std::any::Any>>,
5246 }
5247
5248 fn add_device<T: VirtioDeviceBuilder>(
5249 i: usize,
5250 device_params: T,
5251 vhost: &str,
5252 jail_config: Option<&JailConfig>,
5253 devices_jails: &mut BTreeMap<libc::pid_t, DeviceJailInfo>,
5254 ) -> anyhow::Result<()> {
5255 let name = format!("{}-{}", T::NAME, i);
5256
5257 let (pid, _drop_resources) =
5258 jail_and_start_vu_device::<T>(jail_config, device_params, vhost, &name)?;
5259
5260 devices_jails.insert(
5261 pid,
5262 DeviceJailInfo {
5263 name,
5264 _drop_resources,
5265 },
5266 );
5267
5268 Ok(())
5269 }
5270
5271 let mut devices_jails: BTreeMap<libc::pid_t, DeviceJailInfo> = BTreeMap::new();
5272
5273 let jail = if opts.disable_sandbox {
5274 None
5275 } else {
5276 Some(&opts.jail)
5277 };
5278
5279 let control_server_socket = opts.control_socket.map(|path| {
5281 UnlinkUnixSeqpacketListener(
5282 UnixSeqpacketListener::bind(path).expect("Could not bind socket"),
5283 )
5284 });
5285
5286 for (i, params) in opts.serial.iter().enumerate() {
5288 let serial_config = ¶ms.device;
5289 add_device(i, serial_config, ¶ms.vhost, jail, &mut devices_jails)?;
5290 }
5291
5292 let mut disk_host_tubes = Vec::new();
5293 let control_socket_exists = control_server_socket.is_some();
5294 for (i, params) in opts.block.iter().enumerate() {
5296 let tube = if control_socket_exists {
5297 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
5298 disk_host_tubes.push(host_tube);
5299 Some(device_tube)
5300 } else {
5301 None
5302 };
5303 let disk_config = DiskConfig::new(¶ms.device, tube);
5304 add_device(i, disk_config, ¶ms.vhost, jail, &mut devices_jails)?;
5305 }
5306
5307 for (i, params) in opts.vsock.iter().enumerate() {
5309 add_device(i, ¶ms.device, ¶ms.vhost, jail, &mut devices_jails)?;
5310 }
5311
5312 #[cfg(feature = "net")]
5314 for (i, params) in opts.net.iter().enumerate() {
5315 add_device(i, ¶ms.device, ¶ms.vhost, jail, &mut devices_jails)?;
5316 }
5317
5318 if devices_jails.is_empty() {
5320 let err = DevicesCommand::from_args(
5321 &[&std::env::args().next().unwrap_or(String::from("crosvm"))],
5322 &["--help"],
5323 )
5324 .unwrap_err();
5325 println!("{}", err.output);
5326 return Ok(());
5327 }
5328
5329 if let Some(control_server_socket) = control_server_socket {
5330 std::thread::spawn(move || {
5332 start_vhost_user_control_server(control_server_socket, disk_host_tubes)
5333 });
5334 }
5335
5336 while !devices_jails.is_empty() {
5338 match base::linux::wait_for_pid(-1, 0) {
5339 Err(e) => panic!("error waiting for child process to complete: {e:#}"),
5340 Ok((Some(pid), wait_status)) => match devices_jails.remove_entry(&pid) {
5341 Some((_, info)) => {
5342 if let Some(status) = wait_status.code() {
5343 info!(
5344 "process for device {} (PID {}) exited with code {}",
5345 &info.name, pid, status
5346 );
5347 } else if let Some(signal) = wait_status.signal() {
5348 warn!(
5349 "process for device {} (PID {}) has been killed by signal {:?}",
5350 &info.name, pid, signal,
5351 );
5352 }
5353 }
5354 None => error!("pid {} is not one of our device processes", pid),
5355 },
5356 Ok((None, _)) => unreachable!(),
5359 }
5360 }
5361
5362 info!("all device processes have exited");
5363
5364 Ok(())
5365}
5366
5367#[cfg(feature = "crash-report")]
5370pub fn setup_emulator_crash_reporting(_cfg: &Config) -> anyhow::Result<String> {
5371 crash_report::setup_crash_reporting(crash_report::CrashReportAttributes {
5372 product_type: "emulator".to_owned(),
5373 pipe_name: None,
5374 report_uuid: None,
5375 product_name: None,
5376 product_version: None,
5377 })
5378}
5379
5380#[cfg(test)]
5381mod tests {
5382 use std::path::PathBuf;
5383
5384 use arch::CpuSet;
5385 use vm_memory::MemoryRegionPurpose;
5386
5387 use super::*;
5388
5389 fn test_file_backed_mapping(address: u64, size: u64) -> FileBackedMappingParameters {
5392 FileBackedMappingParameters {
5393 address,
5394 size,
5395 path: PathBuf::new(),
5396 offset: 0,
5397 writable: false,
5398 sync: false,
5399 align: false,
5400 ram: true,
5401 }
5402 }
5403
5404 #[test]
5405 fn guest_mem_file_backed_mappings_overlap() {
5406 assert_eq!(
5408 punch_holes_in_guest_mem_layout_for_mappings(
5409 vec![
5410 (GuestAddress(0), 0xD000_0000, Default::default()),
5411 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5412 ],
5413 &[]
5414 )
5415 .unwrap(),
5416 vec![
5417 (GuestAddress(0), 0xD000_0000, Default::default()),
5418 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5419 ],
5420 );
5421
5422 assert_eq!(
5424 punch_holes_in_guest_mem_layout_for_mappings(
5425 vec![
5426 (GuestAddress(0), 0xD000_0000, Default::default()),
5427 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5428 ],
5429 &[test_file_backed_mapping(0xD000_0000, 0x1000)]
5430 )
5431 .unwrap_err()
5432 .to_string(),
5433 "RAM file-backed-mapping must be a subset of a RAM region",
5434 );
5435
5436 assert_eq!(
5438 punch_holes_in_guest_mem_layout_for_mappings(
5439 vec![
5440 (GuestAddress(0), 0xD000_0000, Default::default()),
5441 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5442 ],
5443 &[test_file_backed_mapping(0, 0x2000)]
5444 )
5445 .unwrap(),
5446 vec![
5447 (
5448 GuestAddress(0),
5449 0x2000,
5450 MemoryRegionOptions::new()
5451 .purpose(MemoryRegionPurpose::GuestMemoryRegion)
5452 .file_backed(test_file_backed_mapping(0, 0x2000)),
5453 ),
5454 (
5455 GuestAddress(0x2000),
5456 0xD000_0000 - 0x2000,
5457 Default::default()
5458 ),
5459 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5460 ],
5461 );
5462
5463 assert_eq!(
5465 punch_holes_in_guest_mem_layout_for_mappings(
5466 vec![
5467 (GuestAddress(0), 0xD000_0000, Default::default()),
5468 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5469 ],
5470 &[test_file_backed_mapping(0xD000_0000 - 0x2000, 0x2000)]
5471 )
5472 .unwrap(),
5473 vec![
5474 (GuestAddress(0), 0xD000_0000 - 0x2000, Default::default()),
5475 (
5476 GuestAddress(0xD000_0000 - 0x2000),
5477 0x2000,
5478 MemoryRegionOptions::new()
5479 .purpose(MemoryRegionPurpose::GuestMemoryRegion)
5480 .file_backed(test_file_backed_mapping(0xD000_0000 - 0x2000, 0x2000)),
5481 ),
5482 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5483 ],
5484 );
5485
5486 assert_eq!(
5488 punch_holes_in_guest_mem_layout_for_mappings(
5489 vec![
5490 (GuestAddress(0), 0xD000_0000, Default::default()),
5491 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5492 ],
5493 &[test_file_backed_mapping(0x1000, 0x2000)]
5494 )
5495 .unwrap(),
5496 vec![
5497 (GuestAddress(0), 0x1000, Default::default()),
5498 (
5499 GuestAddress(0x1000),
5500 0x2000,
5501 MemoryRegionOptions::new()
5502 .purpose(MemoryRegionPurpose::GuestMemoryRegion)
5503 .file_backed(test_file_backed_mapping(0x1000, 0x2000)),
5504 ),
5505 (
5506 GuestAddress(0x3000),
5507 0xD000_0000 - 0x3000,
5508 Default::default()
5509 ),
5510 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5511 ],
5512 );
5513
5514 assert_eq!(
5516 punch_holes_in_guest_mem_layout_for_mappings(
5517 vec![
5518 (GuestAddress(0), 0xD000_0000, Default::default()),
5519 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5520 ],
5521 &[test_file_backed_mapping(0x1_0000_0000, 0x2000)]
5522 )
5523 .unwrap(),
5524 vec![
5525 (GuestAddress(0), 0xD000_0000, Default::default()),
5526 (
5527 GuestAddress(0x1_0000_0000),
5528 0x2000,
5529 MemoryRegionOptions::new()
5530 .purpose(MemoryRegionPurpose::GuestMemoryRegion)
5531 .file_backed(test_file_backed_mapping(0x1_0000_0000, 0x2000)),
5532 ),
5533 (
5534 GuestAddress(0x1_0000_2000),
5535 0x8_0000 - 0x2000,
5536 Default::default()
5537 ),
5538 ],
5539 );
5540
5541 assert_eq!(
5543 punch_holes_in_guest_mem_layout_for_mappings(
5544 vec![
5545 (GuestAddress(0), 0xD000_0000, Default::default()),
5546 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5547 ],
5548 &[test_file_backed_mapping(0x1_0008_0000 - 0x2000, 0x2000)]
5549 )
5550 .unwrap(),
5551 vec![
5552 (GuestAddress(0), 0xD000_0000, Default::default()),
5553 (
5554 GuestAddress(0x1_0000_0000),
5555 0x8_0000 - 0x2000,
5556 Default::default()
5557 ),
5558 (
5559 GuestAddress(0x1_0008_0000 - 0x2000),
5560 0x2000,
5561 MemoryRegionOptions::new()
5562 .purpose(MemoryRegionPurpose::GuestMemoryRegion)
5563 .file_backed(test_file_backed_mapping(0x1_0008_0000 - 0x2000, 0x2000)),
5564 ),
5565 ],
5566 );
5567
5568 assert_eq!(
5570 punch_holes_in_guest_mem_layout_for_mappings(
5571 vec![
5572 (GuestAddress(0), 0xD000_0000, Default::default()),
5573 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5574 ],
5575 &[test_file_backed_mapping(0x1_0000_1000, 0x2000)]
5576 )
5577 .unwrap(),
5578 vec![
5579 (GuestAddress(0), 0xD000_0000, Default::default()),
5580 (GuestAddress(0x1_0000_0000), 0x1000, Default::default()),
5581 (
5582 GuestAddress(0x1_0000_1000),
5583 0x2000,
5584 MemoryRegionOptions::new()
5585 .purpose(MemoryRegionPurpose::GuestMemoryRegion)
5586 .file_backed(test_file_backed_mapping(0x1_0000_1000, 0x2000)),
5587 ),
5588 (
5589 GuestAddress(0x1_0000_3000),
5590 0x8_0000 - 0x3000,
5591 Default::default()
5592 ),
5593 ],
5594 );
5595
5596 assert_eq!(
5598 punch_holes_in_guest_mem_layout_for_mappings(
5599 vec![
5600 (GuestAddress(0), 0xD000_0000, Default::default()),
5601 (GuestAddress(0x1_0000_0000), 0x8_0000, Default::default()),
5602 ],
5603 &[test_file_backed_mapping(0xA000_0000, 0x60002000)]
5604 )
5605 .unwrap_err()
5606 .to_string(),
5607 "RAM file-backed-mapping must be a subset of a RAM region",
5608 );
5609
5610 assert_eq!(
5612 punch_holes_in_guest_mem_layout_for_mappings(
5613 vec![
5614 (GuestAddress(0x0000), 0x2000, Default::default()),
5615 (
5616 GuestAddress(0x2000),
5617 0x2000,
5618 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::Bios)
5619 ),
5620 ],
5621 &[test_file_backed_mapping(0x2000, 0x2000)]
5622 )
5623 .unwrap(),
5624 vec![
5625 (GuestAddress(0x0000), 0x2000, Default::default()),
5626 (
5627 GuestAddress(0x2000),
5628 0x2000,
5629 MemoryRegionOptions::new()
5630 .purpose(MemoryRegionPurpose::Bios)
5631 .file_backed(test_file_backed_mapping(0x2000, 0x2000)),
5632 ),
5633 ],
5634 );
5635 }
5636
5637 #[cfg(target_arch = "aarch64")]
5638 #[test]
5639 fn normalized_cpu_ipc_ratios_simple() {
5640 let host_max_freq = 5000000;
5641 let mut cpu_frequencies = BTreeMap::new();
5642 cpu_frequencies.insert(0, vec![100000, 200000, 500000]);
5643 cpu_frequencies.insert(1, vec![50000, 75000, 200000]);
5644
5645 let mut cpu_ipc_ratio = BTreeMap::new();
5646 cpu_ipc_ratio.insert(0, 1024);
5647 cpu_ipc_ratio.insert(1, 512);
5648
5649 let normalized_cpu_ipc_ratios = normalize_cpu_ipc_ratios(
5650 cpu_frequencies.iter().map(|(cpu_id, frequencies)| {
5651 (
5652 *cpu_id,
5653 frequencies.iter().copied().max().unwrap_or_default(),
5654 )
5655 }),
5656 host_max_freq,
5657 |cpu_id| {
5658 cpu_ipc_ratio
5659 .get(&cpu_id)
5660 .copied()
5661 .unwrap_or(DEFAULT_CPU_CAPACITY)
5662 },
5663 )
5664 .expect("normalize_cpu_ipc_ratios failed");
5665
5666 let ratios: Vec<(usize, u32)> = normalized_cpu_ipc_ratios.into_iter().collect();
5667 assert_eq!(ratios, vec![(0, 102), (1, 20)]);
5668 }
5669
5670 #[test]
5671 fn test_get_representative_pcpu() {
5672 use std::collections::BTreeMap;
5673 let mut affinity_map = BTreeMap::new();
5674 affinity_map.insert(0, arch::CpuSet::new(vec![4, 5]));
5675 affinity_map.insert(1, arch::CpuSet::new(vec![6]));
5676 let vcpu_affinity = Some(VcpuAffinity::PerVcpu(affinity_map));
5677
5678 assert_eq!(get_representative_pcpu(0, &vcpu_affinity), 4);
5679 assert_eq!(get_representative_pcpu(1, &vcpu_affinity), 6);
5680 assert_eq!(get_representative_pcpu(2, &vcpu_affinity), 2); let global_affinity = Some(VcpuAffinity::Global(arch::CpuSet::new(vec![7, 8])));
5683 assert_eq!(get_representative_pcpu(0, &global_affinity), 7);
5684 assert_eq!(get_representative_pcpu(1, &global_affinity), 7);
5685
5686 assert_eq!(get_representative_pcpu(0, &None), 0);
5687 assert_eq!(get_representative_pcpu(1, &None), 1);
5688 }
5689
5690 #[test]
5691 fn test_map_vcpu_capacity() {
5692 let vcpu_count = 2;
5693 let mut affinity_map = BTreeMap::new();
5697 affinity_map.insert(0, CpuSet::new(vec![0]));
5698 affinity_map.insert(1, CpuSet::new(vec![2]));
5699 let vcpu_affinity = Some(VcpuAffinity::PerVcpu(affinity_map));
5700
5701 let mut host_capacity = BTreeMap::new();
5702 host_capacity.insert(0, 512);
5703 host_capacity.insert(2, 1024);
5704 let vcpu_capacity = map_vcpu_capacity(vcpu_count, &vcpu_affinity, &host_capacity).unwrap();
5707
5708 assert_eq!(*vcpu_capacity.get(&0).unwrap(), 512);
5710 assert_eq!(*vcpu_capacity.get(&1).unwrap(), 1024);
5711 }
5712
5713 #[test]
5714 fn test_map_vcpu_clusters() {
5715 use std::collections::BTreeMap;
5716 let host_clusters = vec![
5717 arch::CpuSet::new(vec![0, 1, 2, 3]),
5718 arch::CpuSet::new(vec![4, 5, 6, 7]),
5719 ];
5720
5721 let mut affinity_map = BTreeMap::new();
5722 affinity_map.insert(0, arch::CpuSet::new(vec![0])); affinity_map.insert(1, arch::CpuSet::new(vec![4])); affinity_map.insert(2, arch::CpuSet::new(vec![1])); let vcpu_affinity = Some(VcpuAffinity::PerVcpu(affinity_map));
5726
5727 let vcpu_clusters = map_vcpu_clusters(3, &vcpu_affinity, host_clusters.clone()).unwrap();
5728
5729 assert_eq!(vcpu_clusters.len(), 2);
5730 assert!(vcpu_clusters[0].contains(&0));
5732 assert!(vcpu_clusters[0].contains(&2));
5733 assert!(!vcpu_clusters[0].contains(&1));
5734 assert!(vcpu_clusters[1].contains(&1));
5736 assert!(!vcpu_clusters[1].contains(&0));
5737 assert!(!vcpu_clusters[1].contains(&2));
5738 }
5739}