pub mod android;
pub mod fdt;
pub mod pstore;
pub mod serial;
pub mod sys;
use std::collections::BTreeMap;
use std::error::Error as StdError;
use std::fs::File;
use std::io;
use std::ops::Deref;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::mpsc;
use std::sync::mpsc::SendError;
use std::sync::Arc;
use acpi_tables::sdt::SDT;
use base::syslog;
use base::AsRawDescriptor;
use base::AsRawDescriptors;
use base::FileGetLen;
use base::FileReadWriteAtVolatile;
use base::RecvTube;
use base::SendTube;
use base::Tube;
use devices::virtio::VirtioDevice;
use devices::BarRange;
use devices::Bus;
use devices::BusDevice;
use devices::BusDeviceObj;
use devices::BusError;
use devices::BusResumeDevice;
use devices::FwCfgParameters;
use devices::GpeScope;
use devices::HotPlugBus;
use devices::IrqChip;
use devices::IrqEventSource;
use devices::PciAddress;
use devices::PciBus;
use devices::PciDevice;
use devices::PciDeviceError;
use devices::PciInterruptPin;
use devices::PciRoot;
use devices::PciRootCommand;
use devices::PreferredIrq;
#[cfg(any(target_os = "android", target_os = "linux"))]
use devices::ProxyDevice;
use devices::SerialHardware;
use devices::SerialParameters;
use devices::VirtioMmioDevice;
pub use fdt::apply_device_tree_overlays;
pub use fdt::DtbOverlay;
#[cfg(feature = "gdb")]
use gdbstub::arch::Arch;
use hypervisor::IoEventAddress;
use hypervisor::MemCacheType;
use hypervisor::Vm;
#[cfg(windows)]
use jail::FakeMinijailStub as Minijail;
#[cfg(any(target_os = "android", target_os = "linux"))]
use minijail::Minijail;
use remain::sorted;
#[cfg(target_arch = "x86_64")]
use resources::AddressRange;
use resources::SystemAllocator;
use resources::SystemAllocatorConfig;
use serde::de::Visitor;
use serde::Deserialize;
use serde::Serialize;
use serde_keyvalue::FromKeyValues;
pub use serial::add_serial_devices;
pub use serial::get_serial_cmdline;
pub use serial::set_default_serial_parameters;
pub use serial::GetSerialCmdlineError;
pub use serial::SERIAL_ADDR;
use sync::Condvar;
use sync::Mutex;
#[cfg(any(target_os = "android", target_os = "linux"))]
pub use sys::linux::PlatformBusResources;
use thiserror::Error;
use uuid::Uuid;
use vm_control::BatControl;
use vm_control::BatteryType;
use vm_control::PmResource;
use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use vm_memory::GuestMemoryError;
use vm_memory::MemoryRegionInformation;
use vm_memory::MemoryRegionOptions;
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] {
pub use devices::IrqChipAArch64 as IrqChipArch;
#[cfg(feature = "gdb")]
pub use gdbstub_arch::aarch64::AArch64 as GdbArch;
pub use hypervisor::CpuConfigAArch64 as CpuConfigArch;
pub use hypervisor::Hypervisor as HypervisorArch;
pub use hypervisor::VcpuAArch64 as VcpuArch;
pub use hypervisor::VcpuInitAArch64 as VcpuInitArch;
pub use hypervisor::VmAArch64 as VmArch;
} else if #[cfg(target_arch = "riscv64")] {
pub use devices::IrqChipRiscv64 as IrqChipArch;
#[cfg(feature = "gdb")]
pub use gdbstub_arch::riscv::Riscv64 as GdbArch;
pub use hypervisor::CpuConfigRiscv64 as CpuConfigArch;
pub use hypervisor::Hypervisor as HypervisorArch;
pub use hypervisor::VcpuInitRiscv64 as VcpuInitArch;
pub use hypervisor::VcpuRiscv64 as VcpuArch;
pub use hypervisor::VmRiscv64 as VmArch;
} else if #[cfg(target_arch = "x86_64")] {
pub use devices::IrqChipX86_64 as IrqChipArch;
#[cfg(feature = "gdb")]
pub use gdbstub_arch::x86::X86_64_SSE as GdbArch;
pub use hypervisor::CpuConfigX86_64 as CpuConfigArch;
pub use hypervisor::HypervisorX86_64 as HypervisorArch;
pub use hypervisor::VcpuInitX86_64 as VcpuInitArch;
pub use hypervisor::VcpuX86_64 as VcpuArch;
pub use hypervisor::VmX86_64 as VmArch;
}
}
pub enum VmImage {
Kernel(File),
Bios(File),
}
#[derive(Clone, Debug, Deserialize, Serialize, FromKeyValues, PartialEq, Eq)]
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
pub struct Pstore {
pub path: PathBuf,
pub size: u32,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, FromKeyValues)]
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
pub enum FdtPosition {
Start,
End,
AfterPayload,
}
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct CpuSet(Vec<usize>);
impl CpuSet {
pub fn new<I: IntoIterator<Item = usize>>(cpus: I) -> Self {
CpuSet(cpus.into_iter().collect())
}
pub fn iter(&self) -> std::slice::Iter<'_, usize> {
self.0.iter()
}
}
impl FromIterator<usize> for CpuSet {
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = usize>,
{
CpuSet::new(iter)
}
}
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)]
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
pub struct SveConfig {
pub enable: bool,
}
fn parse_cpu_range(s: &str, cpuset: &mut Vec<usize>) -> Result<(), String> {
fn parse_cpu(s: &str) -> Result<usize, String> {
s.parse().map_err(|_| {
format!(
"invalid CPU index {} - index must be a non-negative integer",
s
)
})
}
let (first_cpu, last_cpu) = match s.split_once('-') {
Some((first_cpu, last_cpu)) => {
let first_cpu = parse_cpu(first_cpu)?;
let last_cpu = parse_cpu(last_cpu)?;
if last_cpu < first_cpu {
return Err(format!(
"invalid CPU range {} - ranges must be from low to high",
s
));
}
(first_cpu, last_cpu)
}
None => {
let cpu = parse_cpu(s)?;
(cpu, cpu)
}
};
cpuset.extend(first_cpu..=last_cpu);
Ok(())
}
impl FromStr for CpuSet {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut cpuset = Vec::new();
for part in s.split(',') {
parse_cpu_range(part, &mut cpuset)?;
}
Ok(CpuSet::new(cpuset))
}
}
impl Deref for CpuSet {
type Target = Vec<usize>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl IntoIterator for CpuSet {
type Item = usize;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'de> Deserialize<'de> for CpuSet {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct CpuSetVisitor;
impl<'de> Visitor<'de> for CpuSetVisitor {
type Value = CpuSet;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("CpuSet")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: serde::de::SeqAccess<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum CpuSetValue<'a> {
Single(usize),
Range(&'a str),
}
let mut cpus = Vec::new();
while let Some(cpuset) = seq.next_element::<CpuSetValue>()? {
match cpuset {
CpuSetValue::Single(cpu) => cpus.push(cpu),
CpuSetValue::Range(range) => {
parse_cpu_range(range, &mut cpus).map_err(serde::de::Error::custom)?;
}
}
}
Ok(CpuSet::new(cpus))
}
}
deserializer.deserialize_seq(CpuSetVisitor)
}
}
impl Serialize for CpuSet {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeSeq;
let mut seq = serializer.serialize_seq(None)?;
let mut serialize_range = |start: usize, end: usize| -> Result<(), S::Error> {
if start == end {
seq.serialize_element(&start)?;
} else {
seq.serialize_element(&format!("{}-{}", start, end))?;
}
Ok(())
};
let mut range = None;
for core in &self.0 {
range = match range {
None => Some((core, core)),
Some((start, end)) if *end == *core - 1 => Some((start, core)),
Some((start, end)) => {
serialize_range(*start, *end)?;
Some((core, core))
}
};
}
if let Some((start, end)) = range {
serialize_range(*start, *end)?;
}
seq.end()
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
pub enum VcpuAffinity {
Global(CpuSet),
PerVcpu(BTreeMap<usize, CpuSet>),
}
#[sorted]
pub struct VmComponents {
#[cfg(all(target_arch = "x86_64", unix))]
pub ac_adapter: bool,
pub acpi_sdts: Vec<SDT>,
pub android_fstab: Option<File>,
pub boot_cpu: usize,
pub bootorder_fw_cfg_blob: Vec<u8>,
#[cfg(target_arch = "x86_64")]
pub break_linux_pci_config_io: bool,
pub cpu_capacity: BTreeMap<usize, u32>,
pub cpu_clusters: Vec<CpuSet>,
#[cfg(all(
any(target_arch = "arm", target_arch = "aarch64"),
any(target_os = "android", target_os = "linux")
))]
pub cpu_frequencies: BTreeMap<usize, Vec<u32>>,
pub delay_rt: bool,
pub dynamic_power_coefficient: BTreeMap<usize, u32>,
pub extra_kernel_params: Vec<String>,
#[cfg(target_arch = "x86_64")]
pub force_s2idle: bool,
pub fw_cfg_enable: bool,
pub fw_cfg_parameters: Vec<FwCfgParameters>,
pub host_cpu_topology: bool,
pub hugepages: bool,
pub hv_cfg: hypervisor::Config,
pub initrd_image: Option<File>,
pub itmt: bool,
pub memory_size: u64,
pub no_i8042: bool,
pub no_rtc: bool,
pub no_smt: bool,
#[cfg(all(
any(target_arch = "arm", target_arch = "aarch64"),
any(target_os = "android", target_os = "linux")
))]
pub normalized_cpu_capacities: BTreeMap<usize, u32>,
#[cfg(target_arch = "x86_64")]
pub pci_low_start: Option<u64>,
#[cfg(target_arch = "x86_64")]
pub pcie_ecam: Option<AddressRange>,
pub pflash_block_size: u32,
pub pflash_image: Option<File>,
pub pstore: Option<Pstore>,
pub pvm_fw: Option<File>,
pub rt_cpus: CpuSet,
#[cfg(target_arch = "x86_64")]
pub smbios: SmbiosOptions,
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
pub sve_config: SveConfig,
pub swiotlb: Option<u64>,
pub vcpu_affinity: Option<VcpuAffinity>,
pub vcpu_count: usize,
#[cfg(all(
any(target_arch = "arm", target_arch = "aarch64"),
any(target_os = "android", target_os = "linux")
))]
pub vcpu_domain_paths: BTreeMap<usize, PathBuf>,
#[cfg(all(
any(target_arch = "arm", target_arch = "aarch64"),
any(target_os = "android", target_os = "linux")
))]
pub vcpu_domains: BTreeMap<usize, u32>,
#[cfg(all(
any(target_arch = "arm", target_arch = "aarch64"),
any(target_os = "android", target_os = "linux")
))]
pub virt_cpufreq_v2: bool,
pub vm_image: VmImage,
}
#[sorted]
pub struct RunnableLinuxVm<V: VmArch, Vcpu: VcpuArch> {
pub bat_control: Option<BatControl>,
pub delay_rt: bool,
pub devices_thread: Option<std::thread::JoinHandle<()>>,
pub hotplug_bus: BTreeMap<u8, Arc<Mutex<dyn HotPlugBus>>>,
pub io_bus: Arc<Bus>,
pub irq_chip: Box<dyn IrqChipArch>,
pub mmio_bus: Arc<Bus>,
pub no_smt: bool,
pub pid_debug_label_map: BTreeMap<u32, String>,
#[cfg(any(target_os = "android", target_os = "linux"))]
pub platform_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
pub pm: Option<Arc<Mutex<dyn PmResource + Send>>>,
pub resume_notify_devices: Vec<Arc<Mutex<dyn BusResumeDevice>>>,
pub root_config: Arc<Mutex<PciRoot>>,
pub rt_cpus: CpuSet,
pub suspend_tube: (Arc<Mutex<SendTube>>, RecvTube),
pub vcpu_affinity: Option<VcpuAffinity>,
pub vcpu_count: usize,
pub vcpu_init: Vec<VcpuInitArch>,
pub vcpus: Option<Vec<Vcpu>>,
pub vm: V,
pub vm_request_tubes: Vec<Tube>,
}
pub struct VirtioDeviceStub {
pub dev: Box<dyn VirtioDevice>,
pub jail: Option<Minijail>,
}
pub trait LinuxArch {
type Error: StdError;
type ArchMemoryLayout;
fn arch_memory_layout(
components: &VmComponents,
) -> std::result::Result<Self::ArchMemoryLayout, Self::Error>;
fn guest_memory_layout(
components: &VmComponents,
arch_memory_layout: &Self::ArchMemoryLayout,
hypervisor: &impl hypervisor::Hypervisor,
) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>;
fn get_system_allocator_config<V: Vm>(
vm: &V,
arch_memory_layout: &Self::ArchMemoryLayout,
) -> SystemAllocatorConfig;
fn build_vm<V, Vcpu>(
components: VmComponents,
arch_memory_layout: &Self::ArchMemoryLayout,
vm_evt_wrtube: &SendTube,
system_allocator: &mut SystemAllocator,
serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
serial_jail: Option<Minijail>,
battery: (Option<BatteryType>, Option<Minijail>),
vm: V,
ramoops_region: Option<pstore::RamoopsRegion>,
devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
irq_chip: &mut dyn IrqChipArch,
vcpu_ids: &mut Vec<usize>,
dump_device_tree_blob: Option<PathBuf>,
debugcon_jail: Option<Minijail>,
#[cfg(target_arch = "x86_64")] pflash_jail: Option<Minijail>,
#[cfg(target_arch = "x86_64")] fw_cfg_jail: Option<Minijail>,
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
device_tree_overlays: Vec<DtbOverlay>,
fdt_position: Option<FdtPosition>,
no_pmu: bool,
) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
where
V: VmArch,
Vcpu: VcpuArch;
fn configure_vcpu<V: Vm>(
vm: &V,
hypervisor: &dyn HypervisorArch,
irq_chip: &mut dyn IrqChipArch,
vcpu: &mut dyn VcpuArch,
vcpu_init: VcpuInitArch,
vcpu_id: usize,
num_cpus: usize,
cpu_config: Option<CpuConfigArch>,
) -> Result<(), Self::Error>;
fn register_pci_device<V: VmArch, Vcpu: VcpuArch>(
linux: &mut RunnableLinuxVm<V, Vcpu>,
device: Box<dyn PciDevice>,
#[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>,
resources: &mut SystemAllocator,
hp_control_tube: &mpsc::Sender<PciRootCommand>,
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
) -> Result<PciAddress, Self::Error>;
fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>, Self::Error>;
fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>, Self::Error>;
fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>, Self::Error>;
fn get_host_cpu_clusters() -> Result<Vec<CpuSet>, Self::Error>;
}
#[cfg(feature = "gdb")]
pub trait GdbOps<T: VcpuArch> {
type Error: StdError;
fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers, Self::Error>;
fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<(), Self::Error>;
fn read_memory(
vcpu: &T,
guest_mem: &GuestMemory,
vaddr: GuestAddress,
len: usize,
) -> Result<Vec<u8>, Self::Error>;
fn write_memory(
vcpu: &T,
guest_mem: &GuestMemory,
vaddr: GuestAddress,
buf: &[u8],
) -> Result<(), Self::Error>;
fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>, Self::Error>;
fn write_register(
vcpu: &T,
reg_id: <GdbArch as Arch>::RegId,
data: &[u8],
) -> Result<(), Self::Error>;
fn enable_singlestep(vcpu: &T) -> Result<(), Self::Error>;
fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize, Self::Error>;
fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<(), Self::Error>;
}
#[sorted]
#[derive(Error, Debug)]
pub enum DeviceRegistrationError {
#[error("no more addresses are available")]
AddrsExhausted,
#[error("Allocating device addresses: {0}")]
AllocateDeviceAddrs(PciDeviceError),
#[error("Allocating IO addresses: {0}")]
AllocateIoAddrs(PciDeviceError),
#[error("Allocating IO resource: {0}")]
AllocateIoResource(resources::Error),
#[error("Allocating IRQ number")]
AllocateIrq,
#[cfg(any(target_os = "android", target_os = "linux"))]
#[error("Allocating IRQ resource: {0}")]
AllocateIrqResource(devices::vfio::VfioError),
#[error("pci topology is broken")]
BrokenPciTopology,
#[cfg(any(target_os = "android", target_os = "linux"))]
#[error("failed to clone jail: {0}")]
CloneJail(minijail::Error),
#[error("unable to add device to kernel command line: {0}")]
Cmdline(kernel_cmdline::Error),
#[error("failed to configure window size: {0}")]
ConfigureWindowSize(PciDeviceError),
#[error("failed to create pipe: {0}")]
CreatePipe(base::Error),
#[error("failed to create pci root: {0}")]
CreateRoot(anyhow::Error),
#[error("failed to create serial device: {0}")]
CreateSerialDevice(devices::SerialError),
#[error("failed to create tube: {0}")]
CreateTube(base::TubeError),
#[error("failed to clone event: {0}")]
EventClone(base::Error),
#[error("failed to create event: {0}")]
EventCreate(base::Error),
#[error("failed to generate ACPI content")]
GenerateAcpi,
#[error("no more IRQs are available")]
IrqsExhausted,
#[error("cannot match VFIO device to DT node due to a missing symbol")]
MissingDeviceTreeSymbol,
#[error("missing required serial device {0}")]
MissingRequiredSerialDevice(u8),
#[error("failed to add to mmio bus: {0}")]
MmioInsert(BusError),
#[error("failed to insert device into PCI root: {0}")]
PciRootAddDevice(PciDeviceError),
#[cfg(any(target_os = "android", target_os = "linux"))]
#[error("failed to create proxy device: {0}")]
ProxyDeviceCreation(devices::ProxyError),
#[cfg(any(target_os = "android", target_os = "linux"))]
#[error("failed to register battery device to VM: {0}")]
RegisterBattery(devices::BatteryError),
#[error("failed to register PCI device to pci root bus")]
RegisterDevice(SendError<PciRootCommand>),
#[error("could not register PCI device capabilities: {0}")]
RegisterDeviceCapabilities(PciDeviceError),
#[error("failed to register ioevent to VM: {0}")]
RegisterIoevent(base::Error),
#[error("failed to register irq event to VM: {0}")]
RegisterIrqfd(base::Error),
#[error("Setting up VFIO platform IRQ: {0}")]
SetupVfioPlatformIrq(anyhow::Error),
}
pub fn configure_pci_device<V: VmArch, Vcpu: VcpuArch>(
linux: &mut RunnableLinuxVm<V, Vcpu>,
mut device: Box<dyn PciDevice>,
#[cfg(any(target_os = "android", target_os = "linux"))] jail: Option<Minijail>,
resources: &mut SystemAllocator,
hp_control_tube: &mpsc::Sender<PciRootCommand>,
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
) -> Result<PciAddress, DeviceRegistrationError> {
let pci_address = device
.allocate_address(resources)
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
let mmio_ranges = device
.allocate_io_bars(resources)
.map_err(DeviceRegistrationError::AllocateIoAddrs)?;
let device_ranges = device
.allocate_device_bars(resources)
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
if let Some(pci_bus) = device.get_new_pci_bus() {
hp_control_tube
.send(PciRootCommand::AddBridge(pci_bus))
.map_err(DeviceRegistrationError::RegisterDevice)?;
let bar_ranges = Vec::new();
device
.configure_bridge_window(resources, &bar_ranges)
.map_err(DeviceRegistrationError::ConfigureWindowSize)?;
}
let intx_event = devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
if let PreferredIrq::Fixed { pin, gsi } = device.preferred_irq() {
resources.reserve_irq(gsi);
device.assign_irq(
intx_event
.try_clone()
.map_err(DeviceRegistrationError::EventClone)?,
pin,
gsi,
);
linux
.irq_chip
.as_irq_chip_mut()
.register_level_irq_event(gsi, &intx_event, IrqEventSource::from_device(&device))
.map_err(DeviceRegistrationError::RegisterIrqfd)?;
}
let mut keep_rds = device.keep_rds();
syslog::push_descriptors(&mut keep_rds);
cros_tracing::push_descriptors!(&mut keep_rds);
metrics::push_descriptors(&mut keep_rds);
device
.register_device_capabilities()
.map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
#[cfg(any(target_os = "android", target_os = "linux"))]
let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
let proxy = ProxyDevice::new(
device,
jail,
keep_rds,
#[cfg(feature = "swap")]
swap_controller,
)
.map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
linux
.pid_debug_label_map
.insert(proxy.pid() as u32, proxy.debug_label());
Arc::new(Mutex::new(proxy))
} else {
device.on_sandboxed();
Arc::new(Mutex::new(device))
};
#[cfg(windows)]
let arced_dev = {
device.on_sandboxed();
Arc::new(Mutex::new(device))
};
#[cfg(any(target_os = "android", target_os = "linux"))]
hp_control_tube
.send(PciRootCommand::Add(pci_address, arced_dev.clone()))
.map_err(DeviceRegistrationError::RegisterDevice)?;
for range in &mmio_ranges {
linux
.mmio_bus
.insert(arced_dev.clone(), range.addr, range.size)
.map_err(DeviceRegistrationError::MmioInsert)?;
}
for range in &device_ranges {
linux
.mmio_bus
.insert(arced_dev.clone(), range.addr, range.size)
.map_err(DeviceRegistrationError::MmioInsert)?;
}
Ok(pci_address)
}
pub fn generate_virtio_mmio_bus(
devices: Vec<(VirtioMmioDevice, Option<Minijail>)>,
irq_chip: &mut dyn IrqChip,
mmio_bus: &Bus,
resources: &mut SystemAllocator,
vm: &mut impl Vm,
sdts: Vec<SDT>,
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
) -> Result<(BTreeMap<u32, String>, Vec<SDT>), DeviceRegistrationError> {
#[cfg_attr(windows, allow(unused_mut))]
let mut pid_labels = BTreeMap::new();
#[cfg(target_arch = "x86_64")]
let mut sdts = sdts;
for dev_value in devices.into_iter() {
#[cfg(any(target_os = "android", target_os = "linux"))]
let (mut device, jail) = dev_value;
#[cfg(windows)]
let (mut device, _) = dev_value;
let ranges = device
.allocate_regions(resources)
.map_err(DeviceRegistrationError::AllocateIoResource)?;
let mut keep_rds = device.keep_rds();
syslog::push_descriptors(&mut keep_rds);
cros_tracing::push_descriptors!(&mut keep_rds);
metrics::push_descriptors(&mut keep_rds);
let irq_num = resources
.allocate_irq()
.ok_or(DeviceRegistrationError::AllocateIrq)?;
let irq_evt = devices::IrqEdgeEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
irq_chip
.register_edge_irq_event(irq_num, &irq_evt, IrqEventSource::from_device(&device))
.map_err(DeviceRegistrationError::RegisterIrqfd)?;
device.assign_irq(&irq_evt, irq_num);
keep_rds.extend(irq_evt.as_raw_descriptors());
for (event, addr, datamatch) in device.ioevents() {
let io_addr = IoEventAddress::Mmio(addr);
vm.register_ioevent(event, io_addr, datamatch)
.map_err(DeviceRegistrationError::RegisterIoevent)?;
keep_rds.push(event.as_raw_descriptor());
}
#[cfg(target_arch = "x86_64")]
{
sdts = device
.generate_acpi(sdts)
.ok_or(DeviceRegistrationError::GenerateAcpi)?;
}
#[cfg(any(target_os = "android", target_os = "linux"))]
let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
let proxy = ProxyDevice::new(
device,
jail,
keep_rds,
#[cfg(feature = "swap")]
swap_controller,
)
.map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
Arc::new(Mutex::new(proxy))
} else {
device.on_sandboxed();
Arc::new(Mutex::new(device))
};
#[cfg(windows)]
let arced_dev = {
device.on_sandboxed();
Arc::new(Mutex::new(device))
};
for range in &ranges {
mmio_bus
.insert(arced_dev.clone(), range.0, range.1)
.map_err(DeviceRegistrationError::MmioInsert)?;
}
}
Ok((pid_labels, sdts))
}
fn generate_pci_topology(
parent_bus: Arc<Mutex<PciBus>>,
resources: &mut SystemAllocator,
io_ranges: &mut BTreeMap<usize, Vec<BarRange>>,
device_ranges: &mut BTreeMap<usize, Vec<BarRange>>,
device_addrs: &[PciAddress],
devices: &mut Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
) -> Result<(Vec<BarRange>, u8), DeviceRegistrationError> {
let mut bar_ranges = Vec::new();
let bus_num = parent_bus.lock().get_bus_num();
let mut subordinate_bus = bus_num;
for (dev_idx, addr) in device_addrs.iter().enumerate() {
if addr.bus == bus_num {
if let Some(child_bus) = devices[dev_idx].0.get_new_pci_bus() {
let (child_bar_ranges, child_sub_bus) = generate_pci_topology(
child_bus.clone(),
resources,
io_ranges,
device_ranges,
device_addrs,
devices,
)?;
let device = &mut devices[dev_idx].0;
parent_bus
.lock()
.add_child_bus(child_bus.clone())
.map_err(|_| DeviceRegistrationError::BrokenPciTopology)?;
let bridge_window = device
.configure_bridge_window(resources, &child_bar_ranges)
.map_err(DeviceRegistrationError::ConfigureWindowSize)?;
bar_ranges.extend(bridge_window);
let ranges = device
.allocate_io_bars(resources)
.map_err(DeviceRegistrationError::AllocateIoAddrs)?;
io_ranges.insert(dev_idx, ranges.clone());
bar_ranges.extend(ranges);
let ranges = device
.allocate_device_bars(resources)
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
device_ranges.insert(dev_idx, ranges.clone());
bar_ranges.extend(ranges);
device.set_subordinate_bus(child_sub_bus);
subordinate_bus = std::cmp::max(subordinate_bus, child_sub_bus);
}
}
}
for (dev_idx, addr) in device_addrs.iter().enumerate() {
if addr.bus == bus_num {
let device = &mut devices[dev_idx].0;
if device.get_new_pci_bus().is_none() {
let ranges = device
.allocate_io_bars(resources)
.map_err(DeviceRegistrationError::AllocateIoAddrs)?;
io_ranges.insert(dev_idx, ranges.clone());
bar_ranges.extend(ranges);
let ranges = device
.allocate_device_bars(resources)
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
device_ranges.insert(dev_idx, ranges.clone());
bar_ranges.extend(ranges);
}
}
}
Ok((bar_ranges, subordinate_bus))
}
pub fn assign_pci_addresses(
devices: &mut [(Box<dyn BusDeviceObj>, Option<Minijail>)],
resources: &mut SystemAllocator,
) -> Result<(), DeviceRegistrationError> {
for pci_device in devices
.iter_mut()
.filter_map(|(device, _jail)| device.as_pci_device_mut())
.filter(|pci_device| pci_device.preferred_address().is_some())
{
let _ = pci_device
.allocate_address(resources)
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
}
for pci_device in devices
.iter_mut()
.filter_map(|(device, _jail)| device.as_pci_device_mut())
.filter(|pci_device| pci_device.preferred_address().is_none())
{
let _ = pci_device
.allocate_address(resources)
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
}
Ok(())
}
pub fn generate_pci_root(
mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
irq_chip: &mut dyn IrqChip,
mmio_bus: Arc<Bus>,
mmio_base: GuestAddress,
mmio_register_bit_num: usize,
io_bus: Arc<Bus>,
resources: &mut SystemAllocator,
vm: &mut impl Vm,
max_irqs: usize,
vcfg_base: Option<u64>,
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
) -> Result<
(
PciRoot,
Vec<(PciAddress, u32, PciInterruptPin)>,
BTreeMap<u32, String>,
BTreeMap<PciAddress, Vec<u8>>,
BTreeMap<PciAddress, Vec<u8>>,
),
DeviceRegistrationError,
> {
let mut device_addrs = Vec::new();
for (device, _jail) in devices.iter_mut() {
let address = device
.allocate_address(resources)
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
device_addrs.push(address);
}
let mut device_ranges = BTreeMap::new();
let mut io_ranges = BTreeMap::new();
let root_bus = Arc::new(Mutex::new(PciBus::new(0, 0, false)));
generate_pci_topology(
root_bus.clone(),
resources,
&mut io_ranges,
&mut device_ranges,
&device_addrs,
&mut devices,
)?;
let mut root = PciRoot::new(
vm,
Arc::downgrade(&mmio_bus),
mmio_base,
mmio_register_bit_num,
Arc::downgrade(&io_bus),
root_bus,
)
.map_err(DeviceRegistrationError::CreateRoot)?;
#[cfg_attr(windows, allow(unused_mut))]
let mut pid_labels = BTreeMap::new();
let mut pci_irqs = Vec::new();
let mut irqs: Vec<u32> = Vec::new();
let mut dev_pin_irq = BTreeMap::new();
for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
let pci_address = device_addrs[dev_idx];
let irq = match device.preferred_irq() {
PreferredIrq::Fixed { pin, gsi } => {
resources.reserve_irq(gsi);
Some((pin, gsi))
}
PreferredIrq::Any => {
let pin = match pci_address.func % 4 {
0 => PciInterruptPin::IntA,
1 => PciInterruptPin::IntB,
2 => PciInterruptPin::IntC,
_ => PciInterruptPin::IntD,
};
let pin_key = (pci_address.bus, pci_address.dev, pin);
let irq_num = if let Some(irq_num) = dev_pin_irq.get(&pin_key) {
*irq_num
} else {
let irq_num = if irqs.len() < max_irqs {
let irq_num = resources
.allocate_irq()
.ok_or(DeviceRegistrationError::AllocateIrq)?;
irqs.push(irq_num);
irq_num
} else {
irqs[dev_idx % max_irqs]
};
dev_pin_irq.insert(pin_key, irq_num);
irq_num
};
Some((pin, irq_num))
}
PreferredIrq::None => {
None
}
};
if let Some((pin, gsi)) = irq {
let intx_event =
devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
device.assign_irq(
intx_event
.try_clone()
.map_err(DeviceRegistrationError::EventClone)?,
pin,
gsi,
);
irq_chip
.register_level_irq_event(gsi, &intx_event, IrqEventSource::from_device(device))
.map_err(DeviceRegistrationError::RegisterIrqfd)?;
pci_irqs.push((pci_address, gsi, pin));
}
}
let devices = {
let (sandboxed, non_sandboxed): (Vec<_>, Vec<_>) = devices
.into_iter()
.enumerate()
.partition(|(_, (_, jail))| jail.is_some());
sandboxed.into_iter().chain(non_sandboxed)
};
let mut amls = BTreeMap::new();
let mut gpe_scope_amls = BTreeMap::new();
for (dev_idx, dev_value) in devices {
#[cfg(any(target_os = "android", target_os = "linux"))]
let (mut device, jail) = dev_value;
#[cfg(windows)]
let (mut device, _) = dev_value;
let address = device_addrs[dev_idx];
let mut keep_rds = device.keep_rds();
syslog::push_descriptors(&mut keep_rds);
cros_tracing::push_descriptors!(&mut keep_rds);
metrics::push_descriptors(&mut keep_rds);
keep_rds.append(&mut vm.get_memory().as_raw_descriptors());
let ranges = io_ranges.remove(&dev_idx).unwrap_or_default();
let device_ranges = device_ranges.remove(&dev_idx).unwrap_or_default();
device
.register_device_capabilities()
.map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
if let Some(vcfg_base) = vcfg_base {
let (methods, shm) = device.generate_acpi_methods();
if !methods.is_empty() {
amls.insert(address, methods);
}
if let Some((offset, mmap)) = shm {
let _ = vm.add_memory_region(
GuestAddress(vcfg_base + offset as u64),
Box::new(mmap),
false,
false,
MemCacheType::CacheCoherent,
);
}
}
let gpe_nr = device.set_gpe(resources);
#[cfg(any(target_os = "android", target_os = "linux"))]
let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
let proxy = ProxyDevice::new(
device,
jail,
keep_rds,
#[cfg(feature = "swap")]
swap_controller,
)
.map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
Arc::new(Mutex::new(proxy))
} else {
device.on_sandboxed();
Arc::new(Mutex::new(device))
};
#[cfg(windows)]
let arced_dev = {
device.on_sandboxed();
Arc::new(Mutex::new(device))
};
root.add_device(address, arced_dev.clone(), vm)
.map_err(DeviceRegistrationError::PciRootAddDevice)?;
for range in &ranges {
mmio_bus
.insert(arced_dev.clone(), range.addr, range.size)
.map_err(DeviceRegistrationError::MmioInsert)?;
}
for range in &device_ranges {
mmio_bus
.insert(arced_dev.clone(), range.addr, range.size)
.map_err(DeviceRegistrationError::MmioInsert)?;
}
if let Some(gpe_nr) = gpe_nr {
if let Some(acpi_path) = root.acpi_path(&address) {
let mut gpe_aml = Vec::new();
GpeScope {}.cast_to_aml_bytes(
&mut gpe_aml,
gpe_nr,
format!("\\{}", acpi_path).as_str(),
);
if !gpe_aml.is_empty() {
gpe_scope_amls.insert(address, gpe_aml);
}
}
}
}
Ok((root, pci_irqs, pid_labels, amls, gpe_scope_amls))
}
#[sorted]
#[derive(Error, Debug)]
pub enum LoadImageError {
#[error("Alignment not a power of two: {0}")]
BadAlignment(u64),
#[error("Getting image size failed: {0}")]
GetLen(io::Error),
#[error("GuestMemory get slice failed: {0}")]
GuestMemorySlice(GuestMemoryError),
#[error("Image size too large: {0}")]
ImageSizeTooLarge(u64),
#[error("No suitable memory region found")]
NoSuitableMemoryRegion,
#[error("Reading image into memory failed: {0}")]
ReadToMemory(io::Error),
#[error("Cannot load zero-sized image")]
ZeroSizedImage,
}
pub fn load_image<F>(
guest_mem: &GuestMemory,
image: &mut F,
guest_addr: GuestAddress,
max_size: u64,
) -> Result<usize, LoadImageError>
where
F: FileReadWriteAtVolatile + FileGetLen,
{
let size = image.get_len().map_err(LoadImageError::GetLen)?;
if size > usize::MAX as u64 || size > max_size {
return Err(LoadImageError::ImageSizeTooLarge(size));
}
let size = size as usize;
let guest_slice = guest_mem
.get_slice_at_addr(guest_addr, size)
.map_err(LoadImageError::GuestMemorySlice)?;
image
.read_exact_at_volatile(guest_slice, 0)
.map_err(LoadImageError::ReadToMemory)?;
Ok(size)
}
pub fn load_image_high<F>(
guest_mem: &GuestMemory,
image: &mut F,
min_guest_addr: GuestAddress,
max_guest_addr: GuestAddress,
region_filter: Option<fn(&MemoryRegionInformation) -> bool>,
align: u64,
) -> Result<(GuestAddress, usize), LoadImageError>
where
F: FileReadWriteAtVolatile + FileGetLen,
{
if !align.is_power_of_two() {
return Err(LoadImageError::BadAlignment(align));
}
let max_size = max_guest_addr.offset_from(min_guest_addr) & !(align - 1);
let size = image.get_len().map_err(LoadImageError::GetLen)?;
if size == 0 {
return Err(LoadImageError::ZeroSizedImage);
}
if size > usize::MAX as u64 || size > max_size {
return Err(LoadImageError::ImageSizeTooLarge(size));
}
let mut regions: Vec<_> = guest_mem
.regions()
.filter(region_filter.unwrap_or(|_| true))
.collect();
regions.sort_unstable_by(|a, b| a.guest_addr.cmp(&b.guest_addr));
let guest_addr = regions
.into_iter()
.rev()
.filter_map(|r| {
let rgn_max_addr = r
.guest_addr
.checked_add((r.size as u64).checked_sub(1)?)?
.min(max_guest_addr);
let rgn_start_aligned = r.guest_addr.align(align)?;
let image_addr = rgn_max_addr.checked_sub(size - 1)? & !(align - 1);
if image_addr >= rgn_start_aligned {
Some(image_addr)
} else {
None
}
})
.find(|&addr| addr >= min_guest_addr)
.ok_or(LoadImageError::NoSuitableMemoryRegion)?;
let size = size as usize;
let guest_slice = guest_mem
.get_slice_at_addr(guest_addr, size)
.map_err(LoadImageError::GuestMemorySlice)?;
image
.read_exact_at_volatile(guest_slice, 0)
.map_err(LoadImageError::ReadToMemory)?;
Ok((guest_addr, size))
}
#[derive(Clone, Debug, Default, Serialize, Deserialize, FromKeyValues, PartialEq, Eq)]
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
pub struct SmbiosOptions {
pub bios_vendor: Option<String>,
pub bios_version: Option<String>,
pub manufacturer: Option<String>,
pub product_name: Option<String>,
pub serial_number: Option<String>,
pub uuid: Option<Uuid>,
#[serde(default)]
pub oem_strings: Vec<String>,
}
#[cfg(test)]
mod tests {
use serde_keyvalue::from_key_values;
use tempfile::tempfile;
use super::*;
#[test]
fn parse_pstore() {
let res: Pstore = from_key_values("path=/some/path,size=16384").unwrap();
assert_eq!(
res,
Pstore {
path: "/some/path".into(),
size: 16384,
}
);
let res = from_key_values::<Pstore>("path=/some/path");
assert!(res.is_err());
let res = from_key_values::<Pstore>("size=16384");
assert!(res.is_err());
let res = from_key_values::<Pstore>("");
assert!(res.is_err());
}
#[test]
fn deserialize_cpuset_serde_kv() {
let res: CpuSet = from_key_values("[0,4,7]").unwrap();
assert_eq!(res, CpuSet::new(vec![0, 4, 7]));
let res: CpuSet = from_key_values("[9-12]").unwrap();
assert_eq!(res, CpuSet::new(vec![9, 10, 11, 12]));
let res: CpuSet = from_key_values("[0,4,7,9-12,15]").unwrap();
assert_eq!(res, CpuSet::new(vec![0, 4, 7, 9, 10, 11, 12, 15]));
}
#[test]
fn deserialize_serialize_cpuset_json() {
let json_str = "[0,4,7]";
let cpuset = CpuSet::new(vec![0, 4, 7]);
let res: CpuSet = serde_json::from_str(json_str).unwrap();
assert_eq!(res, cpuset);
assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
let json_str = r#"["9-12"]"#;
let cpuset = CpuSet::new(vec![9, 10, 11, 12]);
let res: CpuSet = serde_json::from_str(json_str).unwrap();
assert_eq!(res, cpuset);
assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
let json_str = r#"[0,4,7,"9-12",15]"#;
let cpuset = CpuSet::new(vec![0, 4, 7, 9, 10, 11, 12, 15]);
let res: CpuSet = serde_json::from_str(json_str).unwrap();
assert_eq!(res, cpuset);
assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
}
#[test]
fn load_image_high_max_4g() {
let mem = GuestMemory::new(&[
(GuestAddress(0x0000_0000), 0x4000_0000), (GuestAddress(0x8000_0000), 0x4000_0000), ])
.unwrap();
const TEST_IMAGE_SIZE: u64 = 1234;
let mut test_image = tempfile().unwrap();
test_image.set_len(TEST_IMAGE_SIZE).unwrap();
const TEST_ALIGN: u64 = 0x8000;
let (addr, size) = load_image_high(
&mem,
&mut test_image,
GuestAddress(0x8000),
GuestAddress(0xFFFF_FFFF), None,
TEST_ALIGN,
)
.unwrap();
assert_eq!(addr, GuestAddress(0xBFFF_8000));
assert_eq!(addr.offset() % TEST_ALIGN, 0);
assert_eq!(size, TEST_IMAGE_SIZE as usize);
}
}