use std::sync::Arc;
use anyhow::anyhow;
use anyhow::Context;
use base::error;
#[cfg(not(test))]
use base::Clock;
use base::Error;
use base::Event;
#[cfg(test)]
use base::FakeClock as Clock;
use base::Result;
use base::Tube;
use hypervisor::kvm::KvmVcpu;
use hypervisor::kvm::KvmVm;
use hypervisor::HypervisorCap;
use hypervisor::IoapicState;
use hypervisor::IrqRoute;
use hypervisor::IrqSource;
use hypervisor::IrqSourceChip;
use hypervisor::LapicState;
use hypervisor::MPState;
use hypervisor::PicSelect;
use hypervisor::PicState;
use hypervisor::PitState;
use hypervisor::Vcpu;
use hypervisor::VcpuX86_64;
use hypervisor::Vm;
use hypervisor::VmX86_64;
use kvm_sys::*;
use resources::SystemAllocator;
use serde::Deserialize;
use serde::Serialize;
use sync::Mutex;
use crate::irqchip::Ioapic;
use crate::irqchip::IrqEvent;
use crate::irqchip::IrqEventIndex;
use crate::irqchip::Pic;
use crate::irqchip::VcpuRunState;
use crate::irqchip::IOAPIC_BASE_ADDRESS;
use crate::irqchip::IOAPIC_MEM_LENGTH_BYTES;
use crate::Bus;
use crate::IrqChip;
use crate::IrqChipCap;
use crate::IrqChipX86_64;
use crate::IrqEdgeEvent;
use crate::IrqEventSource;
use crate::IrqLevelEvent;
use crate::Pit;
use crate::PitError;
const PIT_CHANNEL0_IRQ: u32 = 0;
fn kvm_default_irq_routing_table(ioapic_pins: usize) -> Vec<IrqRoute> {
let mut routes: Vec<IrqRoute> = Vec::new();
for i in 0..8 {
routes.push(IrqRoute::pic_irq_route(IrqSourceChip::PicPrimary, i));
routes.push(IrqRoute::ioapic_irq_route(i));
}
for i in 8..16 {
routes.push(IrqRoute::pic_irq_route(IrqSourceChip::PicSecondary, i));
routes.push(IrqRoute::ioapic_irq_route(i));
}
for i in 16..ioapic_pins as u32 {
routes.push(IrqRoute::ioapic_irq_route(i));
}
routes
}
pub struct KvmKernelIrqChip {
pub(super) vm: KvmVm,
pub(super) vcpus: Arc<Mutex<Vec<Option<KvmVcpu>>>>,
pub(super) routes: Arc<Mutex<Vec<IrqRoute>>>,
}
#[derive(Serialize, Deserialize)]
struct KvmKernelIrqChipSnapshot {
routes: Vec<IrqRoute>,
apic_base: Vec<u64>,
interrupt_bitmap: Vec<[u64; 4usize]>,
}
impl KvmKernelIrqChip {
pub fn new(vm: KvmVm, num_vcpus: usize) -> Result<KvmKernelIrqChip> {
vm.create_irq_chip()?;
vm.create_pit()?;
let ioapic_pins = vm.get_ioapic_num_pins()?;
Ok(KvmKernelIrqChip {
vm,
vcpus: Arc::new(Mutex::new((0..num_vcpus).map(|_| None).collect())),
routes: Arc::new(Mutex::new(kvm_default_irq_routing_table(ioapic_pins))),
})
}
pub(super) fn arch_try_clone(&self) -> Result<Self> {
Ok(KvmKernelIrqChip {
vm: self.vm.try_clone()?,
vcpus: self.vcpus.clone(),
routes: self.routes.clone(),
})
}
}
impl IrqChipX86_64 for KvmKernelIrqChip {
fn try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>> {
Ok(Box::new(self.try_clone()?))
}
fn as_irq_chip(&self) -> &dyn IrqChip {
self
}
fn as_irq_chip_mut(&mut self) -> &mut dyn IrqChip {
self
}
fn get_pic_state(&self, select: PicSelect) -> Result<PicState> {
Ok(PicState::from(&self.vm.get_pic_state(select)?))
}
fn set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()> {
self.vm.set_pic_state(select, &kvm_pic_state::from(state))
}
fn get_ioapic_state(&self) -> Result<IoapicState> {
Ok(IoapicState::from(&self.vm.get_ioapic_state()?))
}
fn set_ioapic_state(&mut self, state: &IoapicState) -> Result<()> {
self.vm.set_ioapic_state(&kvm_ioapic_state::from(state))
}
fn get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
match self.vcpus.lock().get(vcpu_id) {
Some(Some(vcpu)) => Ok(LapicState::from(&vcpu.get_lapic()?)),
_ => Err(Error::new(libc::ENOENT)),
}
}
fn set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
match self.vcpus.lock().get(vcpu_id) {
Some(Some(vcpu)) => vcpu.set_lapic(&kvm_lapic_state::from(state)),
_ => Err(Error::new(libc::ENOENT)),
}
}
fn lapic_frequency(&self) -> u32 {
1_000_000_000
}
fn get_pit(&self) -> Result<PitState> {
Ok(PitState::from(&self.vm.get_pit_state()?))
}
fn set_pit(&mut self, state: &PitState) -> Result<()> {
self.vm.set_pit_state(&kvm_pit_state2::from(state))
}
fn pit_uses_speaker_port(&self) -> bool {
false
}
fn snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value> {
let mut apics: Vec<u64> = Vec::new();
let mut interrupt_bitmaps: Vec<[u64; 4usize]> = Vec::new();
{
let vcpus_lock = self.vcpus.lock();
for vcpu in (*vcpus_lock).iter().flatten() {
apics.push(vcpu.get_apic_base()?);
interrupt_bitmaps.push(vcpu.get_interrupt_bitmap()?);
}
}
serde_json::to_value(KvmKernelIrqChipSnapshot {
routes: self.routes.lock().clone(),
apic_base: apics,
interrupt_bitmap: interrupt_bitmaps,
})
.context("failed to serialize KvmKernelIrqChip")
}
fn restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
let deser: KvmKernelIrqChipSnapshot =
serde_json::from_value(data).context("failed to deserialize data")?;
self.set_irq_routes(&deser.routes)?;
let vcpus_lock = self.vcpus.lock();
assert_eq!(deser.interrupt_bitmap.len(), vcpus_lock.len());
assert_eq!(deser.apic_base.len(), vcpus_lock.len());
for (i, vcpu) in vcpus_lock.iter().enumerate() {
if let Some(vcpu) = vcpu {
vcpu.set_apic_base(*deser.apic_base.get(i).unwrap())?;
vcpu.set_interrupt_bitmap(*deser.interrupt_bitmap.get(i).unwrap())?;
} else {
return Err(anyhow!(
"Received None instead of Vcpu while restoring apic_base and interrupt_bitmap"
));
}
}
Ok(())
}
}
pub struct KvmSplitIrqChip {
vm: KvmVm,
vcpus: Arc<Mutex<Vec<Option<KvmVcpu>>>>,
routes: Arc<Mutex<Vec<IrqRoute>>>,
pit: Arc<Mutex<Pit>>,
pic: Arc<Mutex<Pic>>,
ioapic: Arc<Mutex<Ioapic>>,
ioapic_pins: usize,
delayed_ioapic_irq_events: Arc<Mutex<Vec<usize>>>,
delayed_ioapic_irq_trigger: Event,
irq_events: Arc<Mutex<Vec<Option<IrqEvent>>>>,
}
fn kvm_dummy_msi_routes(ioapic_pins: usize) -> Vec<IrqRoute> {
let mut routes: Vec<IrqRoute> = Vec::new();
for i in 0..ioapic_pins {
routes.push(
IrqRoute {
gsi: i as u32,
source: IrqSource::Msi {
address: 0,
data: 0,
},
},
);
}
routes
}
impl KvmSplitIrqChip {
pub fn new(
vm: KvmVm,
num_vcpus: usize,
irq_tube: Tube,
ioapic_pins: Option<usize>,
) -> Result<Self> {
let ioapic_pins = ioapic_pins.unwrap_or(vm.get_ioapic_num_pins()?);
vm.enable_split_irqchip(ioapic_pins)?;
let pit_evt = IrqEdgeEvent::new()?;
let pit = Pit::new(pit_evt.try_clone()?, Arc::new(Mutex::new(Clock::new()))).map_err(
|e| match e {
PitError::CloneEvent(err) => err,
PitError::CreateEvent(err) => err,
PitError::CreateWaitContext(err) => err,
PitError::WaitError(err) => err,
PitError::TimerCreateError(err) => err,
PitError::SpawnThread(_) => Error::new(libc::EIO),
},
)?;
let pit_event_source = IrqEventSource::from_device(&pit);
let mut chip = KvmSplitIrqChip {
vm,
vcpus: Arc::new(Mutex::new((0..num_vcpus).map(|_| None).collect())),
routes: Arc::new(Mutex::new(Vec::new())),
pit: Arc::new(Mutex::new(pit)),
pic: Arc::new(Mutex::new(Pic::new())),
ioapic: Arc::new(Mutex::new(Ioapic::new(irq_tube, ioapic_pins)?)),
ioapic_pins,
delayed_ioapic_irq_events: Arc::new(Mutex::new(Vec::new())),
delayed_ioapic_irq_trigger: Event::new()?,
irq_events: Arc::new(Mutex::new(Default::default())),
};
let mut routes = kvm_default_irq_routing_table(ioapic_pins);
routes.append(&mut kvm_dummy_msi_routes(ioapic_pins));
chip.set_irq_routes(&routes)?;
chip.register_edge_irq_event(PIT_CHANNEL0_IRQ, &pit_evt, pit_event_source)?;
Ok(chip)
}
}
impl KvmSplitIrqChip {
fn routes_to_chips(&self, irq: u32) -> Vec<(IrqSourceChip, u32)> {
let mut chips = Vec::new();
for route in self.routes.lock().iter() {
match route {
IrqRoute {
gsi,
source: IrqSource::Irqchip { chip, pin },
} if *gsi == irq => match chip {
IrqSourceChip::PicPrimary
| IrqSourceChip::PicSecondary
| IrqSourceChip::Ioapic => chips.push((*chip, *pin)),
IrqSourceChip::Gic => {
error!("gic irq should not be possible on a KvmSplitIrqChip")
}
IrqSourceChip::Aia => {
error!("Aia irq should not be possible on x86_64")
}
},
_ => {}
}
}
chips
}
pub fn interrupt_requested(&self, vcpu_id: usize) -> bool {
if vcpu_id != 0 {
return false;
}
self.pic.lock().interrupt_requested()
}
pub fn get_external_interrupt(&self, vcpu_id: usize) -> Option<u8> {
if vcpu_id != 0 {
return None;
}
self.pic.lock().get_external_interrupt()
}
fn register_irq_event(
&mut self,
irq: u32,
irq_event: &Event,
resample_event: Option<&Event>,
source: IrqEventSource,
) -> Result<Option<IrqEventIndex>> {
if irq < self.ioapic_pins as u32 {
let mut evt = IrqEvent {
gsi: irq,
event: irq_event.try_clone()?,
resample_event: None,
source,
};
if let Some(resample_event) = resample_event {
evt.resample_event = Some(resample_event.try_clone()?);
}
let mut irq_events = self.irq_events.lock();
let index = irq_events.len();
irq_events.push(Some(evt));
Ok(Some(index))
} else {
self.vm.register_irqfd(irq, irq_event, resample_event)?;
Ok(None)
}
}
fn unregister_irq_event(&mut self, irq: u32, irq_event: &Event) -> Result<()> {
if irq < self.ioapic_pins as u32 {
let mut irq_events = self.irq_events.lock();
for (index, evt) in irq_events.iter().enumerate() {
if let Some(evt) = evt {
if evt.gsi == irq && irq_event.eq(&evt.event) {
irq_events[index] = None;
break;
}
}
}
Ok(())
} else {
self.vm.unregister_irqfd(irq, irq_event)
}
}
}
fn routes_conflict(route: &IrqRoute, other: &IrqRoute) -> bool {
if route.gsi != other.gsi {
return false;
}
if let (IrqSource::Msi { .. }, IrqSource::Msi { .. }) = (route.source, other.source) {
return true;
}
if let (
IrqSource::Irqchip {
chip: route_chip, ..
},
IrqSource::Irqchip {
chip: other_chip, ..
},
) = (route.source, other.source)
{
return route_chip == other_chip;
}
false
}
impl IrqChip for KvmSplitIrqChip {
fn add_vcpu(&mut self, vcpu_id: usize, vcpu: &dyn Vcpu) -> Result<()> {
let vcpu: &KvmVcpu = vcpu
.downcast_ref()
.expect("KvmSplitIrqChip::add_vcpu called with non-KvmVcpu");
self.vcpus.lock()[vcpu_id] = Some(vcpu.try_clone()?);
Ok(())
}
fn register_edge_irq_event(
&mut self,
irq: u32,
irq_event: &IrqEdgeEvent,
source: IrqEventSource,
) -> Result<Option<IrqEventIndex>> {
self.register_irq_event(irq, irq_event.get_trigger(), None, source)
}
fn unregister_edge_irq_event(&mut self, irq: u32, irq_event: &IrqEdgeEvent) -> Result<()> {
self.unregister_irq_event(irq, irq_event.get_trigger())
}
fn register_level_irq_event(
&mut self,
irq: u32,
irq_event: &IrqLevelEvent,
source: IrqEventSource,
) -> Result<Option<IrqEventIndex>> {
self.register_irq_event(
irq,
irq_event.get_trigger(),
Some(irq_event.get_resample()),
source,
)
}
fn unregister_level_irq_event(&mut self, irq: u32, irq_event: &IrqLevelEvent) -> Result<()> {
self.unregister_irq_event(irq, irq_event.get_trigger())
}
fn route_irq(&mut self, route: IrqRoute) -> Result<()> {
let mut routes = self.routes.lock();
routes.retain(|r| !routes_conflict(r, &route));
routes.push(route);
let mut msi_routes = routes.clone();
msi_routes.retain(|r| matches!(r.source, IrqSource::Msi { .. }));
self.vm.set_gsi_routing(&msi_routes)
}
fn set_irq_routes(&mut self, routes: &[IrqRoute]) -> Result<()> {
let mut current_routes = self.routes.lock();
*current_routes = routes.to_vec();
let mut msi_routes = routes.to_vec();
msi_routes.retain(|r| matches!(r.source, IrqSource::Msi { .. }));
self.vm.set_gsi_routing(&msi_routes)
}
fn irq_event_tokens(&self) -> Result<Vec<(IrqEventIndex, IrqEventSource, Event)>> {
let mut tokens = vec![];
for (index, evt) in self.irq_events.lock().iter().enumerate() {
if let Some(evt) = evt {
tokens.push((index, evt.source.clone(), evt.event.try_clone()?));
}
}
Ok(tokens)
}
fn service_irq(&mut self, irq: u32, level: bool) -> Result<()> {
let chips = self.routes_to_chips(irq);
for (chip, pin) in chips {
match chip {
IrqSourceChip::PicPrimary | IrqSourceChip::PicSecondary => {
self.pic.lock().service_irq(pin as u8, level);
}
IrqSourceChip::Ioapic => {
self.ioapic.lock().service_irq(pin as usize, level);
}
_ => {}
}
}
Ok(())
}
fn service_irq_event(&mut self, event_index: IrqEventIndex) -> Result<()> {
if let Some(evt) = &self.irq_events.lock()[event_index] {
evt.event.wait()?;
let chips = self.routes_to_chips(evt.gsi);
for (chip, pin) in chips {
match chip {
IrqSourceChip::PicPrimary | IrqSourceChip::PicSecondary => {
let mut pic = self.pic.lock();
pic.service_irq(pin as u8, true);
if evt.resample_event.is_none() {
pic.service_irq(pin as u8, false);
}
}
IrqSourceChip::Ioapic => {
if let Ok(mut ioapic) = self.ioapic.try_lock() {
ioapic.service_irq(pin as usize, true);
if evt.resample_event.is_none() {
ioapic.service_irq(pin as usize, false);
}
} else {
self.delayed_ioapic_irq_events.lock().push(event_index);
self.delayed_ioapic_irq_trigger.signal().unwrap();
}
}
_ => {}
}
}
}
Ok(())
}
fn broadcast_eoi(&self, vector: u8) -> Result<()> {
self.ioapic.lock().end_of_interrupt(vector);
Ok(())
}
fn inject_interrupts(&self, vcpu: &dyn Vcpu) -> Result<()> {
let vcpu: &KvmVcpu = vcpu
.downcast_ref()
.expect("KvmSplitIrqChip::add_vcpu called with non-KvmVcpu");
let vcpu_id = vcpu.id();
if !self.interrupt_requested(vcpu_id) || !vcpu.ready_for_interrupt() {
return Ok(());
}
if let Some(vector) = self.get_external_interrupt(vcpu_id) {
vcpu.interrupt(vector)?;
}
if self.interrupt_requested(vcpu_id) {
vcpu.set_interrupt_window_requested(true);
}
Ok(())
}
fn halted(&self, _vcpu_id: usize) {}
fn wait_until_runnable(&self, _vcpu: &dyn Vcpu) -> Result<VcpuRunState> {
Ok(VcpuRunState::Runnable)
}
fn kick_halted_vcpus(&self) {}
fn get_mp_state(&self, vcpu_id: usize) -> Result<MPState> {
match self.vcpus.lock().get(vcpu_id) {
Some(Some(vcpu)) => Ok(MPState::from(&vcpu.get_mp_state()?)),
_ => Err(Error::new(libc::ENOENT)),
}
}
fn set_mp_state(&mut self, vcpu_id: usize, state: &MPState) -> Result<()> {
match self.vcpus.lock().get(vcpu_id) {
Some(Some(vcpu)) => vcpu.set_mp_state(&kvm_mp_state::from(state)),
_ => Err(Error::new(libc::ENOENT)),
}
}
fn try_clone(&self) -> Result<Self> {
Ok(KvmSplitIrqChip {
vm: self.vm.try_clone()?,
vcpus: self.vcpus.clone(),
routes: self.routes.clone(),
pit: self.pit.clone(),
pic: self.pic.clone(),
ioapic: self.ioapic.clone(),
ioapic_pins: self.ioapic_pins,
delayed_ioapic_irq_events: self.delayed_ioapic_irq_events.clone(),
delayed_ioapic_irq_trigger: Event::new()?,
irq_events: self.irq_events.clone(),
})
}
fn finalize_devices(
&mut self,
resources: &mut SystemAllocator,
io_bus: &Bus,
mmio_bus: &Bus,
) -> Result<()> {
io_bus.insert(self.pit.clone(), 0x040, 0x8).unwrap();
io_bus.insert(self.pit.clone(), 0x061, 0x1).unwrap();
io_bus.insert(self.pic.clone(), 0x20, 0x2).unwrap();
io_bus.insert(self.pic.clone(), 0xa0, 0x2).unwrap();
io_bus.insert(self.pic.clone(), 0x4d0, 0x2).unwrap();
mmio_bus
.insert(
self.ioapic.clone(),
IOAPIC_BASE_ADDRESS,
IOAPIC_MEM_LENGTH_BYTES,
)
.unwrap();
let mut ioapic_resample_events: Vec<Vec<Event>> =
(0..self.ioapic_pins).map(|_| Vec::new()).collect();
let mut pic_resample_events: Vec<Vec<Event>> =
(0..self.ioapic_pins).map(|_| Vec::new()).collect();
for evt in self.irq_events.lock().iter().flatten() {
if (evt.gsi as usize) >= self.ioapic_pins {
continue;
}
if let Some(resample_evt) = &evt.resample_event {
ioapic_resample_events[evt.gsi as usize].push(resample_evt.try_clone()?);
pic_resample_events[evt.gsi as usize].push(resample_evt.try_clone()?);
}
}
self.ioapic
.lock()
.register_resample_events(ioapic_resample_events);
self.pic
.lock()
.register_resample_events(pic_resample_events);
let mut irq_num = resources.allocate_irq().unwrap();
while irq_num < self.ioapic_pins as u32 {
irq_num = resources.allocate_irq().unwrap();
}
Ok(())
}
fn process_delayed_irq_events(&mut self) -> Result<()> {
self.delayed_ioapic_irq_events
.lock()
.retain(|&event_index| {
if let Some(evt) = &self.irq_events.lock()[event_index] {
if let Ok(mut ioapic) = self.ioapic.try_lock() {
ioapic.service_irq(evt.gsi as usize, true);
if evt.resample_event.is_none() {
ioapic.service_irq(evt.gsi as usize, false);
}
false
} else {
true
}
} else {
true
}
});
if self.delayed_ioapic_irq_events.lock().is_empty() {
self.delayed_ioapic_irq_trigger.wait()?;
}
Ok(())
}
fn irq_delayed_event_token(&self) -> Result<Option<Event>> {
Ok(Some(self.delayed_ioapic_irq_trigger.try_clone()?))
}
fn check_capability(&self, c: IrqChipCap) -> bool {
match c {
IrqChipCap::TscDeadlineTimer => self
.vm
.get_hypervisor()
.check_capability(HypervisorCap::TscDeadlineTimer),
IrqChipCap::X2Apic => true,
IrqChipCap::MpStateGetSet => true,
}
}
}
#[derive(Serialize, Deserialize)]
struct KvmSplitIrqChipSnapshot {
routes: Vec<IrqRoute>,
}
impl IrqChipX86_64 for KvmSplitIrqChip {
fn try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>> {
Ok(Box::new(self.try_clone()?))
}
fn as_irq_chip(&self) -> &dyn IrqChip {
self
}
fn as_irq_chip_mut(&mut self) -> &mut dyn IrqChip {
self
}
fn get_pic_state(&self, select: PicSelect) -> Result<PicState> {
Ok(self.pic.lock().get_pic_state(select))
}
fn set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()> {
self.pic.lock().set_pic_state(select, state);
Ok(())
}
fn get_ioapic_state(&self) -> Result<IoapicState> {
Ok(self.ioapic.lock().get_ioapic_state())
}
fn set_ioapic_state(&mut self, state: &IoapicState) -> Result<()> {
self.ioapic.lock().set_ioapic_state(state);
Ok(())
}
fn get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
match self.vcpus.lock().get(vcpu_id) {
Some(Some(vcpu)) => Ok(LapicState::from(&vcpu.get_lapic()?)),
_ => Err(Error::new(libc::ENOENT)),
}
}
fn set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
match self.vcpus.lock().get(vcpu_id) {
Some(Some(vcpu)) => vcpu.set_lapic(&kvm_lapic_state::from(state)),
_ => Err(Error::new(libc::ENOENT)),
}
}
fn lapic_frequency(&self) -> u32 {
1_000_000_000
}
fn get_pit(&self) -> Result<PitState> {
Ok(self.pit.lock().get_pit_state())
}
fn set_pit(&mut self, state: &PitState) -> Result<()> {
self.pit.lock().set_pit_state(state);
Ok(())
}
fn pit_uses_speaker_port(&self) -> bool {
true
}
fn snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value> {
serde_json::to_value(KvmSplitIrqChipSnapshot {
routes: self.routes.lock().clone(),
})
.context("failed to serialize KvmSplitIrqChip")
}
fn restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
let deser: KvmSplitIrqChipSnapshot =
serde_json::from_value(data).context("failed to deserialize KvmSplitIrqChip")?;
self.set_irq_routes(&deser.routes)?;
Ok(())
}
}