hypervisor/kvm/
mod.rs

1// Copyright 2020 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#[cfg(target_arch = "aarch64")]
6mod aarch64;
7#[cfg(target_arch = "riscv64")]
8mod riscv64;
9#[cfg(target_arch = "x86_64")]
10mod x86_64;
11
12mod cap;
13
14use std::cmp::Reverse;
15use std::collections::BTreeMap;
16use std::collections::BinaryHeap;
17use std::convert::TryFrom;
18use std::ffi::CString;
19use std::fs::File;
20use std::os::raw::c_ulong;
21use std::os::raw::c_void;
22use std::os::unix::prelude::OsStrExt;
23use std::path::Path;
24use std::sync::Arc;
25use std::sync::OnceLock;
26
27#[cfg(target_arch = "aarch64")]
28pub use aarch64::*;
29use base::errno_result;
30use base::error;
31use base::ioctl;
32use base::ioctl_with_mut_ref;
33use base::ioctl_with_ref;
34use base::ioctl_with_val;
35use base::pagesize;
36use base::AsRawDescriptor;
37use base::Error;
38use base::Event;
39use base::FromRawDescriptor;
40use base::MappedRegion;
41use base::MemoryMapping;
42use base::MemoryMappingBuilder;
43use base::MmapError;
44use base::Protection;
45use base::RawDescriptor;
46use base::Result;
47use base::SafeDescriptor;
48pub use cap::KvmCap;
49use cfg_if::cfg_if;
50use data_model::vec_with_array_field;
51use kvm_sys::*;
52use libc::open64;
53use libc::EFAULT;
54use libc::EINVAL;
55use libc::EIO;
56use libc::ENOENT;
57use libc::ENOSPC;
58use libc::ENOSYS;
59#[cfg(not(target_arch = "aarch64"))]
60use libc::ENOTSUP;
61use libc::EOVERFLOW;
62use libc::O_CLOEXEC;
63use libc::O_RDWR;
64#[cfg(target_arch = "riscv64")]
65use riscv64::*;
66use sync::Mutex;
67use vm_memory::GuestAddress;
68use vm_memory::GuestMemory;
69#[cfg(target_arch = "x86_64")]
70pub use x86_64::*;
71
72use crate::BalloonEvent;
73use crate::ClockState;
74use crate::Config;
75use crate::Datamatch;
76use crate::DeviceKind;
77use crate::HypercallAbi;
78use crate::Hypervisor;
79use crate::HypervisorCap;
80use crate::HypervisorKind;
81use crate::IoEventAddress;
82use crate::IoOperation;
83use crate::IoParams;
84use crate::IrqRoute;
85use crate::IrqSource;
86use crate::MPState;
87use crate::MemCacheType;
88use crate::MemSlot;
89use crate::Vcpu;
90use crate::VcpuExit;
91use crate::VcpuSignalHandle;
92use crate::VcpuSignalHandleInner;
93use crate::Vm;
94use crate::VmCap;
95
96// Wrapper around KVM_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping
97// from guest physical to host user pages.
98//
99// SAFETY:
100// Safe when the guest regions are guaranteed not to overlap.
101unsafe fn set_user_memory_region(
102    kvm: &KvmVm,
103    slot: MemSlot,
104    read_only: bool,
105    log_dirty_pages: bool,
106    cache: MemCacheType,
107    guest_addr: u64,
108    memory_size: u64,
109    userspace_addr: *mut u8,
110) -> Result<()> {
111    let mut use_2_variant = false;
112    let mut flags = 0;
113    if read_only {
114        flags |= KVM_MEM_READONLY;
115    }
116    if log_dirty_pages {
117        flags |= KVM_MEM_LOG_DIRTY_PAGES;
118    }
119    if kvm.caps.user_noncoherent_dma && cache == MemCacheType::CacheNonCoherent {
120        flags |= KVM_MEM_NON_COHERENT_DMA;
121        use_2_variant = kvm.caps.user_memory_region2;
122    }
123
124    let untagged_userspace_addr = untagged_addr(userspace_addr as usize);
125    let ret = if use_2_variant {
126        let region2 = kvm_userspace_memory_region2 {
127            slot,
128            flags,
129            guest_phys_addr: guest_addr,
130            memory_size,
131            userspace_addr: untagged_userspace_addr as u64,
132            guest_memfd_offset: 0,
133            guest_memfd: 0,
134            ..Default::default()
135        };
136        ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION2, &region2)
137    } else {
138        let region = kvm_userspace_memory_region {
139            slot,
140            flags,
141            guest_phys_addr: guest_addr,
142            memory_size,
143            userspace_addr: (untagged_userspace_addr as u64),
144        };
145        ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION, &region)
146    };
147
148    if ret == 0 {
149        Ok(())
150    } else {
151        errno_result()
152    }
153}
154
155// https://github.com/torvalds/linux/blob/master/Documentation/virt/kvm/api.rst
156// On architectures that support a form of address tagging, userspace_addr must be an untagged
157// address.
158#[inline]
159fn untagged_addr(addr: usize) -> usize {
160    let tag_bits_mask: u64 = if cfg!(target_arch = "aarch64") {
161        0xFF00000000000000
162    } else {
163        0
164    };
165    addr & !tag_bits_mask as usize
166}
167
168/// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
169/// size.
170///
171/// # Arguments
172///
173/// * `size` - Number of bytes in the memory region being queried.
174pub fn dirty_log_bitmap_size(size: usize) -> usize {
175    let page_size = pagesize();
176    size.div_ceil(page_size).div_ceil(8)
177}
178
179pub struct Kvm {
180    kvm: SafeDescriptor,
181    vcpu_mmap_size: usize,
182}
183
184impl Kvm {
185    pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
186        let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
187        // SAFETY:
188        // Open calls are safe because we give a nul-terminated string and verify the result.
189        let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
190        if ret < 0 {
191            return errno_result();
192        }
193        // SAFETY:
194        // Safe because we verify that ret is valid and we own the fd.
195        let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
196
197        // SAFETY:
198        // Safe because we know that the descriptor is valid and we verify the return result.
199        let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION) };
200        if version < 0 {
201            return errno_result();
202        }
203
204        // Per the kernel KVM API documentation: "Applications should refuse to run if
205        // KVM_GET_API_VERSION returns a value other than 12."
206        if version as u32 != KVM_API_VERSION {
207            error!(
208                "KVM_GET_API_VERSION: expected {}, got {}",
209                KVM_API_VERSION, version,
210            );
211            return Err(Error::new(ENOSYS));
212        }
213
214        // SAFETY:
215        // Safe because we know that our file is a KVM fd and we verify the return result.
216        let res = unsafe { ioctl(&kvm, KVM_GET_VCPU_MMAP_SIZE) };
217        if res <= 0 {
218            return errno_result();
219        }
220        let vcpu_mmap_size = res as usize;
221
222        Ok(Kvm {
223            kvm,
224            vcpu_mmap_size,
225        })
226    }
227
228    /// Opens `/dev/kvm` and returns a Kvm object on success.
229    pub fn new() -> Result<Kvm> {
230        Kvm::new_with_path(Path::new("/dev/kvm"))
231    }
232}
233
234impl AsRawDescriptor for Kvm {
235    fn as_raw_descriptor(&self) -> RawDescriptor {
236        self.kvm.as_raw_descriptor()
237    }
238}
239
240impl Hypervisor for Kvm {
241    fn try_clone(&self) -> Result<Self> {
242        Ok(Kvm {
243            kvm: self.kvm.try_clone()?,
244            vcpu_mmap_size: self.vcpu_mmap_size,
245        })
246    }
247
248    fn check_capability(&self, cap: HypervisorCap) -> bool {
249        if let Ok(kvm_cap) = KvmCap::try_from(cap) {
250            // SAFETY:
251            // this ioctl is safe because we know this kvm descriptor is valid,
252            // and we are copying over the kvm capability (u32) as a c_ulong value.
253            unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, kvm_cap as c_ulong) == 1 }
254        } else {
255            // this capability cannot be converted on this platform, so return false
256            false
257        }
258    }
259}
260
261/// Storage for constant KVM driver caps
262#[derive(Clone, Default)]
263struct KvmVmCaps {
264    kvmclock_ctrl: bool,
265    user_noncoherent_dma: bool,
266    user_memory_region2: bool,
267    // This capability can't be detected until after the irqchip is configured, so we lazy
268    // initialize it when the first MSI is configured.
269    msi_devid: Arc<OnceLock<bool>>,
270}
271
272/// A wrapper around creating and using a KVM VM.
273pub struct KvmVm {
274    kvm: Kvm,
275    vm: SafeDescriptor,
276    guest_mem: GuestMemory,
277    mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
278    /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
279    mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
280    caps: KvmVmCaps,
281    force_disable_readonly_mem: bool,
282}
283
284impl KvmVm {
285    /// Constructs a new `KvmVm` using the given `Kvm` instance.
286    pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> {
287        // SAFETY:
288        // Safe because we know kvm is a real kvm fd as this module is the only one that can make
289        // Kvm objects.
290        let ret = unsafe {
291            ioctl_with_val(
292                kvm,
293                KVM_CREATE_VM,
294                kvm.get_vm_type(cfg.protection_type)? as c_ulong,
295            )
296        };
297        if ret < 0 {
298            return errno_result();
299        }
300        // SAFETY:
301        // Safe because we verify that ret is valid and we own the fd.
302        let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
303        let mut vm = KvmVm {
304            kvm: kvm.try_clone()?,
305            vm: vm_descriptor,
306            guest_mem,
307            mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
308            mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
309            caps: Default::default(),
310            force_disable_readonly_mem: cfg.force_disable_readonly_mem,
311        };
312        vm.caps.kvmclock_ctrl = vm.check_raw_capability(KvmCap::KvmclockCtrl);
313        vm.caps.user_noncoherent_dma = vm.check_raw_capability(KvmCap::MemNoncoherentDma);
314        vm.caps.user_memory_region2 = vm.check_raw_capability(KvmCap::UserMemory2);
315
316        vm.init_arch(&cfg)?;
317
318        for region in vm.guest_mem.regions() {
319            // SAFETY:
320            // Safe because the guest regions are guaranteed not to overlap.
321            unsafe {
322                set_user_memory_region(
323                    &vm,
324                    region.index as MemSlot,
325                    false,
326                    false,
327                    MemCacheType::CacheCoherent,
328                    region.guest_addr.offset(),
329                    region.size as u64,
330                    region.host_addr as *mut u8,
331                )
332            }?;
333        }
334
335        Ok(vm)
336    }
337
338    pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> {
339        // SAFETY:
340        // Safe because we know that our file is a VM fd and we verify the return result.
341        let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU, c_ulong::try_from(id).unwrap()) };
342        if fd < 0 {
343            return errno_result();
344        }
345
346        // SAFETY:
347        // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
348        // the value of the fd and we own the fd.
349        let vcpu = unsafe { File::from_raw_descriptor(fd) };
350
351        // The VCPU mapping is held by an `Arc` inside `KvmVcpu`, and it can also be cloned by
352        // `signal_handle()` for use in `KvmVcpuSignalHandle`. The mapping will not be destroyed
353        // until all references are dropped, so it is safe to reference `kvm_run` fields via the
354        // `as_ptr()` function during either type's lifetime.
355        let run_mmap = MemoryMappingBuilder::new(self.kvm.vcpu_mmap_size)
356            .from_file(&vcpu)
357            .build()
358            .map_err(|_| Error::new(ENOSPC))?;
359
360        Ok(KvmVcpu {
361            kvm: self.kvm.try_clone()?,
362            vm: self.vm.try_clone()?,
363            vcpu,
364            id,
365            cap_kvmclock_ctrl: self.caps.kvmclock_ctrl,
366            run_mmap: Arc::new(run_mmap),
367        })
368    }
369
370    /// Creates an in kernel interrupt controller.
371    ///
372    /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
373    pub fn create_irq_chip(&self) -> Result<()> {
374        // SAFETY:
375        // Safe because we know that our file is a VM fd and we verify the return result.
376        let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP) };
377        if ret == 0 {
378            Ok(())
379        } else {
380            errno_result()
381        }
382    }
383
384    /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
385    pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
386        let mut irq_level = kvm_irq_level::default();
387        irq_level.__bindgen_anon_1.irq = irq;
388        irq_level.level = active.into();
389
390        // SAFETY:
391        // Safe because we know that our file is a VM fd, we know the kernel will only read the
392        // correct amount of memory from our pointer, and we verify the return result.
393        let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE, &irq_level) };
394        if ret == 0 {
395            Ok(())
396        } else {
397            errno_result()
398        }
399    }
400
401    /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt`
402    /// ( when not None ) will be triggered when the irqchip is resampled.
403    pub fn register_irqfd(
404        &self,
405        gsi: u32,
406        evt: &Event,
407        resample_evt: Option<&Event>,
408    ) -> Result<()> {
409        let mut irqfd = kvm_irqfd {
410            fd: evt.as_raw_descriptor() as u32,
411            gsi,
412            ..Default::default()
413        };
414
415        if let Some(r_evt) = resample_evt {
416            irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE;
417            irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
418        }
419
420        // SAFETY:
421        // Safe because we know that our file is a VM fd, we know the kernel will only read the
422        // correct amount of memory from our pointer, and we verify the return result.
423        let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
424        if ret == 0 {
425            Ok(())
426        } else {
427            errno_result()
428        }
429    }
430
431    /// Unregisters an event that was previously registered with
432    /// `register_irqfd`.
433    ///
434    /// The `evt` and `gsi` pair must be the same as the ones passed into
435    /// `register_irqfd`.
436    pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
437        let irqfd = kvm_irqfd {
438            fd: evt.as_raw_descriptor() as u32,
439            gsi,
440            flags: KVM_IRQFD_FLAG_DEASSIGN,
441            ..Default::default()
442        };
443        // SAFETY:
444        // Safe because we know that our file is a VM fd, we know the kernel will only read the
445        // correct amount of memory from our pointer, and we verify the return result.
446        let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
447        if ret == 0 {
448            Ok(())
449        } else {
450            errno_result()
451        }
452    }
453
454    /// Sets the GSI routing table, replacing any table set with previous calls to
455    /// `set_gsi_routing`.
456    pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
457        let mut irq_routing =
458            vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
459        irq_routing[0].nr = routes.len() as u32;
460
461        let cap_msi_devid = *self
462            .caps
463            .msi_devid
464            .get_or_init(|| self.check_raw_capability(KvmCap::MsiDevid));
465
466        // SAFETY:
467        // Safe because we ensured there is enough space in irq_routing to hold the number of
468        // route entries.
469        let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
470        for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
471            *irq_route = to_kvm_irq_routing_entry(route, cap_msi_devid);
472        }
473
474        // TODO(b/315998194): Add safety comment
475        #[allow(clippy::undocumented_unsafe_blocks)]
476        let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING, &irq_routing[0]) };
477        if ret == 0 {
478            Ok(())
479        } else {
480            errno_result()
481        }
482    }
483
484    fn ioeventfd(
485        &self,
486        evt: &Event,
487        addr: IoEventAddress,
488        datamatch: Datamatch,
489        deassign: bool,
490    ) -> Result<()> {
491        let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
492            Datamatch::AnyLength => (false, 0, 0),
493            Datamatch::U8(v) => match v {
494                Some(u) => (true, u as u64, 1),
495                None => (false, 0, 1),
496            },
497            Datamatch::U16(v) => match v {
498                Some(u) => (true, u as u64, 2),
499                None => (false, 0, 2),
500            },
501            Datamatch::U32(v) => match v {
502                Some(u) => (true, u as u64, 4),
503                None => (false, 0, 4),
504            },
505            Datamatch::U64(v) => match v {
506                Some(u) => (true, u, 8),
507                None => (false, 0, 8),
508            },
509        };
510        let mut flags = 0;
511        if deassign {
512            flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
513        }
514        if do_datamatch {
515            flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
516        }
517        if let IoEventAddress::Pio(_) = addr {
518            flags |= 1 << kvm_ioeventfd_flag_nr_pio;
519        }
520        let ioeventfd = kvm_ioeventfd {
521            datamatch: datamatch_value,
522            len: datamatch_len,
523            addr: match addr {
524                IoEventAddress::Pio(p) => p,
525                IoEventAddress::Mmio(m) => m,
526            },
527            fd: evt.as_raw_descriptor(),
528            flags,
529            ..Default::default()
530        };
531        // SAFETY:
532        // Safe because we know that our file is a VM fd, we know the kernel will only read the
533        // correct amount of memory from our pointer, and we verify the return result.
534        let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD, &ioeventfd) };
535        if ret == 0 {
536            Ok(())
537        } else {
538            errno_result()
539        }
540    }
541
542    /// Checks whether a particular KVM-specific capability is available for this VM.
543    pub fn check_raw_capability(&self, capability: KvmCap) -> bool {
544        // SAFETY:
545        // Safe because we know that our file is a KVM fd, and if the cap is invalid KVM assumes
546        // it's an unavailable extension and returns 0.
547        let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, capability as c_ulong) };
548        match capability {
549            #[cfg(target_arch = "x86_64")]
550            KvmCap::BusLockDetect => {
551                if ret > 0 {
552                    ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT
553                } else {
554                    false
555                }
556            }
557            _ => ret == 1,
558        }
559    }
560
561    // Currently only used on aarch64, but works on any architecture.
562    #[allow(dead_code)]
563    /// Enables a KVM-specific capability for this VM, with the given arguments.
564    ///
565    /// # Safety
566    /// This function is marked as unsafe because `args` may be interpreted as pointers for some
567    /// capabilities. The caller must ensure that any pointers passed in the `args` array are
568    /// allocated as the kernel expects, and that mutable pointers are owned.
569    unsafe fn enable_raw_capability(
570        &self,
571        capability: KvmCap,
572        flags: u32,
573        args: &[u64; 4],
574    ) -> Result<()> {
575        let kvm_cap = kvm_enable_cap {
576            cap: capability as u32,
577            args: *args,
578            flags,
579            ..Default::default()
580        };
581        // SAFETY:
582        // Safe because we allocated the struct and we know the kernel will read exactly the size of
583        // the struct, and because we assume the caller has allocated the args appropriately.
584        let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
585        if ret == 0 {
586            Ok(())
587        } else {
588            errno_result()
589        }
590    }
591
592    fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
593        match if self.guest_mem.use_punchhole_locked() {
594            self.guest_mem.punch_hole_range(guest_address, size)
595        } else {
596            self.guest_mem.remove_range(guest_address, size)
597        } {
598            Ok(_) => Ok(()),
599            Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
600            Err(_) => Err(Error::new(EIO)),
601        }
602    }
603
604    fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
605        // No-op, when the guest attempts to access the pages again, Linux/KVM will provide them.
606        Ok(())
607    }
608}
609
610impl Vm for KvmVm {
611    fn try_clone(&self) -> Result<Self> {
612        Ok(KvmVm {
613            kvm: self.kvm.try_clone()?,
614            vm: self.vm.try_clone()?,
615            guest_mem: self.guest_mem.clone(),
616            mem_regions: self.mem_regions.clone(),
617            mem_slot_gaps: self.mem_slot_gaps.clone(),
618            caps: self.caps.clone(),
619            force_disable_readonly_mem: self.force_disable_readonly_mem,
620        })
621    }
622
623    fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
624        self.vm.try_clone()
625    }
626
627    fn hypervisor_kind(&self) -> HypervisorKind {
628        HypervisorKind::Kvm
629    }
630
631    fn check_capability(&self, c: VmCap) -> bool {
632        if let Some(val) = self.check_capability_arch(c) {
633            return val;
634        }
635        match c {
636            #[cfg(target_arch = "aarch64")]
637            VmCap::ArmPmuV3 => self.check_raw_capability(KvmCap::ArmPmuV3),
638            VmCap::DirtyLog => true,
639            VmCap::PvClock => false,
640            VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm),
641            VmCap::EarlyInitCpuid => false,
642            #[cfg(target_arch = "x86_64")]
643            VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect),
644            VmCap::ReadOnlyMemoryRegion => {
645                !self.force_disable_readonly_mem && self.check_raw_capability(KvmCap::ReadonlyMem)
646            }
647            VmCap::MemNoncoherentDma => {
648                cfg!(feature = "noncoherent-dma")
649                    && self.check_raw_capability(KvmCap::MemNoncoherentDma)
650            }
651            #[cfg(target_arch = "aarch64")]
652            VmCap::Sve => self.check_raw_capability(KvmCap::Sve),
653        }
654    }
655
656    fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> {
657        match c {
658            #[cfg(target_arch = "x86_64")]
659            VmCap::BusLockDetect => {
660                let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0];
661                Ok(
662                    // TODO(b/315998194): Add safety comment
663                    #[allow(clippy::undocumented_unsafe_blocks)]
664                    unsafe {
665                        self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(())
666                    },
667                )
668            }
669            _ => Ok(false),
670        }
671    }
672
673    fn get_guest_phys_addr_bits(&self) -> u8 {
674        self.kvm.get_guest_phys_addr_bits()
675    }
676
677    fn get_memory(&self) -> &GuestMemory {
678        &self.guest_mem
679    }
680
681    fn add_memory_region(
682        &mut self,
683        guest_addr: GuestAddress,
684        mem: Box<dyn MappedRegion>,
685        read_only: bool,
686        log_dirty_pages: bool,
687        cache: MemCacheType,
688    ) -> Result<MemSlot> {
689        let pgsz = pagesize() as u64;
690        // KVM require to set the user memory region with page size aligned size. Safe to extend
691        // the mem.size() to be page size aligned because the mmap will round up the size to be
692        // page size aligned if it is not.
693        let size = (mem.size() as u64).next_multiple_of(pgsz);
694        let end_addr = guest_addr
695            .checked_add(size)
696            .ok_or_else(|| Error::new(EOVERFLOW))?;
697        if self.guest_mem.range_overlap(guest_addr, end_addr) {
698            return Err(Error::new(ENOSPC));
699        }
700        let mut regions = self.mem_regions.lock();
701        let mut gaps = self.mem_slot_gaps.lock();
702        let slot = match gaps.pop() {
703            Some(gap) => gap.0,
704            None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
705        };
706
707        // SAFETY:
708        // Safe because we check that the given guest address is valid and has no overlaps. We also
709        // know that the pointer and size are correct because the MemoryMapping interface ensures
710        // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
711        // is removed.
712        let res = unsafe {
713            set_user_memory_region(
714                self,
715                slot,
716                read_only,
717                log_dirty_pages,
718                cache,
719                guest_addr.offset(),
720                size,
721                mem.as_ptr(),
722            )
723        };
724
725        if let Err(e) = res {
726            gaps.push(Reverse(slot));
727            return Err(e);
728        }
729        regions.insert(slot, mem);
730        Ok(slot)
731    }
732
733    fn enable_hypercalls(&mut self, nr: u64, count: usize) -> Result<()> {
734        cfg_if! {
735            if #[cfg(target_arch = "aarch64")] {
736                let base = u32::try_from(nr).unwrap();
737                let nr_functions = u32::try_from(count).unwrap();
738                self.enable_smccc_forwarding(base, nr_functions)
739            } else {
740                let _ = nr;
741                let _ = count;
742                Err(Error::new(ENOTSUP))
743            }
744        }
745    }
746
747    fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
748        let mut regions = self.mem_regions.lock();
749        let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
750
751        mem.msync(offset, size).map_err(|err| match err {
752            MmapError::InvalidAddress => Error::new(EFAULT),
753            MmapError::NotPageAligned => Error::new(EINVAL),
754            MmapError::SystemCallFailed(e) => e,
755            _ => Error::new(EIO),
756        })
757    }
758
759    fn madvise_pageout_memory_region(
760        &mut self,
761        slot: MemSlot,
762        offset: usize,
763        size: usize,
764    ) -> Result<()> {
765        let mut regions = self.mem_regions.lock();
766        let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
767
768        mem.madvise(offset, size, libc::MADV_PAGEOUT)
769            .map_err(|err| match err {
770                MmapError::InvalidAddress => Error::new(EFAULT),
771                MmapError::NotPageAligned => Error::new(EINVAL),
772                MmapError::SystemCallFailed(e) => e,
773                _ => Error::new(EIO),
774            })
775    }
776
777    fn madvise_remove_memory_region(
778        &mut self,
779        slot: MemSlot,
780        offset: usize,
781        size: usize,
782    ) -> Result<()> {
783        let mut regions = self.mem_regions.lock();
784        let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
785
786        mem.madvise(offset, size, libc::MADV_REMOVE)
787            .map_err(|err| match err {
788                MmapError::InvalidAddress => Error::new(EFAULT),
789                MmapError::NotPageAligned => Error::new(EINVAL),
790                MmapError::SystemCallFailed(e) => e,
791                _ => Error::new(EIO),
792            })
793    }
794
795    fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
796        let mut regions = self.mem_regions.lock();
797        if !regions.contains_key(&slot) {
798            return Err(Error::new(ENOENT));
799        }
800        // SAFETY:
801        // Safe because the slot is checked against the list of memory slots.
802        unsafe {
803            set_user_memory_region(
804                self,
805                slot,
806                false,
807                false,
808                MemCacheType::CacheCoherent,
809                0,
810                0,
811                std::ptr::null_mut(),
812            )?;
813        }
814        self.mem_slot_gaps.lock().push(Reverse(slot));
815        // This remove will always succeed because of the contains_key check above.
816        Ok(regions.remove(&slot).unwrap())
817    }
818
819    fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> {
820        let mut device = if let Some(dev) = self.get_device_params_arch(kind) {
821            dev
822        } else {
823            match kind {
824                DeviceKind::Vfio => kvm_create_device {
825                    type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
826                    fd: 0,
827                    flags: 0,
828                },
829
830                // ARM and risc-v have additional DeviceKinds, so it needs the catch-all pattern
831                #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
832                _ => return Err(Error::new(libc::ENXIO)),
833            }
834        };
835
836        // SAFETY:
837        // Safe because we know that our file is a VM fd, we know the kernel will only write correct
838        // amount of memory to our pointer, and we verify the return result.
839        let ret = unsafe { base::ioctl_with_mut_ref(self, KVM_CREATE_DEVICE, &mut device) };
840        if ret == 0 {
841            Ok(
842                // SAFETY:
843                // Safe because we verify that ret is valid and we own the fd.
844                unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) },
845            )
846        } else {
847            errno_result()
848        }
849    }
850
851    fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> {
852        let regions = self.mem_regions.lock();
853        let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?;
854        // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
855        if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() {
856            return Err(Error::new(EINVAL));
857        }
858
859        let mut dirty_log_kvm = kvm_dirty_log {
860            slot,
861            ..Default::default()
862        };
863        dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
864        // SAFETY:
865        // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid (because
866        // it's from a slice) and we checked that it will be large enough to hold the entire log.
867        let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG, &dirty_log_kvm) };
868        if ret == 0 {
869            Ok(())
870        } else {
871            errno_result()
872        }
873    }
874
875    fn register_ioevent(
876        &mut self,
877        evt: &Event,
878        addr: IoEventAddress,
879        datamatch: Datamatch,
880    ) -> Result<()> {
881        self.ioeventfd(evt, addr, datamatch, false)
882    }
883
884    fn unregister_ioevent(
885        &mut self,
886        evt: &Event,
887        addr: IoEventAddress,
888        datamatch: Datamatch,
889    ) -> Result<()> {
890        self.ioeventfd(evt, addr, datamatch, true)
891    }
892
893    fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
894        // KVM delivers IO events in-kernel with ioeventfds, so this is a no-op
895        Ok(())
896    }
897
898    fn get_pvclock(&self) -> Result<ClockState> {
899        self.get_pvclock_arch()
900    }
901
902    fn set_pvclock(&self, state: &ClockState) -> Result<()> {
903        self.set_pvclock_arch(state)
904    }
905
906    fn add_fd_mapping(
907        &mut self,
908        slot: u32,
909        offset: usize,
910        size: usize,
911        fd: &dyn AsRawDescriptor,
912        fd_offset: u64,
913        prot: Protection,
914    ) -> Result<()> {
915        let mut regions = self.mem_regions.lock();
916        let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
917
918        match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
919            Ok(()) => Ok(()),
920            Err(MmapError::SystemCallFailed(e)) => Err(e),
921            Err(_) => Err(Error::new(EIO)),
922        }
923    }
924
925    fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
926        let mut regions = self.mem_regions.lock();
927        let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
928
929        match region.remove_mapping(offset, size) {
930            Ok(()) => Ok(()),
931            Err(MmapError::SystemCallFailed(e)) => Err(e),
932            Err(_) => Err(Error::new(EIO)),
933        }
934    }
935
936    fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
937        match event {
938            BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
939            BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
940            BalloonEvent::BalloonTargetReached(_) => Ok(()),
941        }
942    }
943}
944
945impl AsRawDescriptor for KvmVm {
946    fn as_raw_descriptor(&self) -> RawDescriptor {
947        self.vm.as_raw_descriptor()
948    }
949}
950
951struct KvmVcpuSignalHandle {
952    run_mmap: Arc<MemoryMapping>,
953}
954
955impl VcpuSignalHandleInner for KvmVcpuSignalHandle {
956    fn signal_immediate_exit(&self) {
957        // SAFETY: we ensure `run_mmap` is a valid mapping of `kvm_run` at creation time, and the
958        // `Arc` ensures the mapping still exists while we hold a reference to it.
959        unsafe {
960            let run = self.run_mmap.as_ptr() as *mut kvm_run;
961            (*run).immediate_exit = 1;
962        }
963    }
964}
965
966/// A wrapper around using a KVM Vcpu.
967pub struct KvmVcpu {
968    kvm: Kvm,
969    vm: SafeDescriptor,
970    vcpu: File,
971    id: usize,
972    cap_kvmclock_ctrl: bool,
973    run_mmap: Arc<MemoryMapping>,
974}
975
976impl Vcpu for KvmVcpu {
977    fn try_clone(&self) -> Result<Self> {
978        let vm = self.vm.try_clone()?;
979        let vcpu = self.vcpu.try_clone()?;
980
981        Ok(KvmVcpu {
982            kvm: self.kvm.try_clone()?,
983            vm,
984            vcpu,
985            cap_kvmclock_ctrl: self.cap_kvmclock_ctrl,
986            id: self.id,
987            run_mmap: self.run_mmap.clone(),
988        })
989    }
990
991    fn as_vcpu(&self) -> &dyn Vcpu {
992        self
993    }
994
995    fn id(&self) -> usize {
996        self.id
997    }
998
999    #[allow(clippy::cast_ptr_alignment)]
1000    fn set_immediate_exit(&self, exit: bool) {
1001        // SAFETY:
1002        // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1003        // kernel told us how large it was. The pointer is page aligned so casting to a different
1004        // type is well defined, hence the clippy allow attribute.
1005        let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1006        run.immediate_exit = exit.into();
1007    }
1008
1009    fn signal_handle(&self) -> VcpuSignalHandle {
1010        VcpuSignalHandle {
1011            inner: Box::new(KvmVcpuSignalHandle {
1012                run_mmap: self.run_mmap.clone(),
1013            }),
1014        }
1015    }
1016
1017    fn on_suspend(&self) -> Result<()> {
1018        // On KVM implementations that use a paravirtualized clock (e.g. x86), a flag must be set to
1019        // indicate to the guest kernel that a vCPU was suspended. The guest kernel will use this
1020        // flag to prevent the soft lockup detection from triggering when this vCPU resumes, which
1021        // could happen days later in realtime.
1022        if self.cap_kvmclock_ctrl {
1023            // SAFETY:
1024            // The ioctl is safe because it does not read or write memory in this process.
1025            if unsafe { ioctl(self, KVM_KVMCLOCK_CTRL) } != 0 {
1026                // Even if the host kernel supports the capability, it may not be configured by
1027                // the guest - for example, when the guest kernel offlines a CPU.
1028                if Error::last().errno() != libc::EINVAL {
1029                    return errno_result();
1030                }
1031            }
1032        }
1033
1034        Ok(())
1035    }
1036
1037    unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> {
1038        let kvm_cap = kvm_enable_cap {
1039            cap,
1040            args: *args,
1041            ..Default::default()
1042        };
1043        // SAFETY:
1044        // Safe because we allocated the struct and we know the kernel will read exactly the size of
1045        // the struct, and because we assume the caller has allocated the args appropriately.
1046        let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
1047        if ret == 0 {
1048            Ok(())
1049        } else {
1050            errno_result()
1051        }
1052    }
1053
1054    #[allow(clippy::cast_ptr_alignment)]
1055    // The pointer is page aligned so casting to a different type is well defined, hence the clippy
1056    // allow attribute.
1057    fn run(&mut self) -> Result<VcpuExit> {
1058        // SAFETY:
1059        // Safe because we know that our file is a VCPU fd and we verify the return result.
1060        let ret = unsafe { ioctl(self, KVM_RUN) };
1061        if ret != 0 {
1062            return errno_result();
1063        }
1064
1065        // SAFETY:
1066        // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1067        // kernel told us how large it was.
1068        let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1069
1070        // Check for architecture-specific VM exit reasons first in case the architecture wants to
1071        // override the default handling.
1072        if let Some(vcpu_exit) = self.handle_vm_exit_arch(run) {
1073            return Ok(vcpu_exit);
1074        }
1075
1076        match run.exit_reason {
1077            KVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
1078            KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1079            KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1080            KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1081            KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1082            KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown(Ok(()))),
1083            KVM_EXIT_FAIL_ENTRY => {
1084                // SAFETY:
1085                // Safe because the exit_reason (which comes from the kernel) told us which
1086                // union field to use.
1087                let hardware_entry_failure_reason = unsafe {
1088                    run.__bindgen_anon_1
1089                        .fail_entry
1090                        .hardware_entry_failure_reason
1091                };
1092                Ok(VcpuExit::FailEntry {
1093                    hardware_entry_failure_reason,
1094                })
1095            }
1096            KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1097            KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1098            KVM_EXIT_SYSTEM_EVENT => {
1099                // SAFETY:
1100                // Safe because we know the exit reason told us this union
1101                // field is valid
1102                let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1103                let event_flags =
1104                    // SAFETY:
1105                    // Safe because we know the exit reason told us this union
1106                    // field is valid
1107                    unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1108                match event_type {
1109                    KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1110                    KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags),
1111                    KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1112                    _ => {
1113                        error!(
1114                            "Unknown KVM system event {} with flags {}",
1115                            event_type, event_flags
1116                        );
1117                        Err(Error::new(EINVAL))
1118                    }
1119                }
1120            }
1121            r => panic!("unknown kvm exit reason: {r}"),
1122        }
1123    }
1124
1125    fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
1126        // SAFETY:
1127        // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1128        // kernel told us how large it was.
1129        let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1130        // Verify that the handler is called in the right context.
1131        assert!(run.exit_reason == KVM_EXIT_MMIO);
1132        // SAFETY:
1133        // Safe because the exit_reason (which comes from the kernel) told us which
1134        // union field to use.
1135        let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1136        let address = mmio.phys_addr;
1137        let data = &mut mmio.data[..mmio.len as usize];
1138        if mmio.is_write != 0 {
1139            handle_fn(IoParams {
1140                address,
1141                operation: IoOperation::Write(data),
1142            })
1143        } else {
1144            handle_fn(IoParams {
1145                address,
1146                operation: IoOperation::Read(data),
1147            })
1148        }
1149    }
1150
1151    fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
1152        // SAFETY:
1153        // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1154        // kernel told us how large it was.
1155        let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1156        // Verify that the handler is called in the right context.
1157        assert!(run.exit_reason == KVM_EXIT_IO);
1158        // SAFETY:
1159        // Safe because the exit_reason (which comes from the kernel) told us which
1160        // union field to use.
1161        let io = unsafe { run.__bindgen_anon_1.io };
1162        let address = u64::from(io.port);
1163        let size = usize::from(io.size);
1164        let count = io.count as usize;
1165        let data_len = count * size;
1166        let data_offset = io.data_offset as usize;
1167        assert!(data_offset + data_len <= self.run_mmap.size());
1168
1169        // SAFETY:
1170        // The data_offset is defined by the kernel to be some number of bytes into the kvm_run
1171        // structure, which we have fully mmap'd.
1172        let buffer: &mut [u8] = unsafe {
1173            std::slice::from_raw_parts_mut(
1174                (run as *mut kvm_run as *mut u8).add(data_offset),
1175                data_len,
1176            )
1177        };
1178        let data_chunks = buffer.chunks_mut(size);
1179
1180        if io.direction == KVM_EXIT_IO_IN as u8 {
1181            for data in data_chunks {
1182                handle_fn(IoParams {
1183                    address,
1184                    operation: IoOperation::Read(data),
1185                });
1186            }
1187        } else {
1188            debug_assert_eq!(io.direction, KVM_EXIT_IO_OUT as u8);
1189            for data in data_chunks {
1190                handle_fn(IoParams {
1191                    address,
1192                    operation: IoOperation::Write(data),
1193                });
1194            }
1195        }
1196
1197        Ok(())
1198    }
1199
1200    fn handle_hypercall(
1201        &self,
1202        handle_fn: &mut dyn FnMut(&mut HypercallAbi) -> anyhow::Result<()>,
1203    ) -> anyhow::Result<()> {
1204        cfg_if! {
1205            if #[cfg(target_arch = "aarch64")] {
1206                // Assume that all handled HVC/SMC calls follow the SMCCC.
1207                self.handle_smccc_call(handle_fn)
1208            } else {
1209                let _ = handle_fn;
1210                unimplemented!("KvmVcpu::handle_hypercall() not supported");
1211            }
1212        }
1213    }
1214}
1215
1216impl KvmVcpu {
1217    /// Gets the vcpu's current "multiprocessing state".
1218    ///
1219    /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1220    /// a call to `Vm::create_irq_chip`.
1221    ///
1222    /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1223    /// to run crosvm on s390.
1224    pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1225        // SAFETY: trivially safe
1226        let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1227        let ret = {
1228            // SAFETY:
1229            // Safe because we know that our file is a VCPU fd, we know the kernel will only write
1230            // the correct amount of memory to our pointer, and we verify the return
1231            // result.
1232            unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE, &mut state) }
1233        };
1234        if ret < 0 {
1235            return errno_result();
1236        }
1237        Ok(state)
1238    }
1239
1240    /// Sets the vcpu's current "multiprocessing state".
1241    ///
1242    /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1243    /// a call to `Vm::create_irq_chip`.
1244    ///
1245    /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1246    /// to run crosvm on s390.
1247    pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1248        let ret = {
1249            // SAFETY:
1250            // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1251            unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE, state) }
1252        };
1253        if ret < 0 {
1254            return errno_result();
1255        }
1256        Ok(())
1257    }
1258}
1259
1260impl AsRawDescriptor for KvmVcpu {
1261    fn as_raw_descriptor(&self) -> RawDescriptor {
1262        self.vcpu.as_raw_descriptor()
1263    }
1264}
1265
1266impl TryFrom<HypervisorCap> for KvmCap {
1267    type Error = Error;
1268
1269    fn try_from(cap: HypervisorCap) -> Result<KvmCap> {
1270        match cap {
1271            HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit),
1272            HypervisorCap::UserMemory => Ok(KvmCap::UserMemory),
1273            #[cfg(target_arch = "x86_64")]
1274            HypervisorCap::Xcrs => Ok(KvmCap::Xcrs),
1275            #[cfg(target_arch = "x86_64")]
1276            HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)),
1277            HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)),
1278            HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)),
1279        }
1280    }
1281}
1282
1283fn to_kvm_irq_routing_entry(item: &IrqRoute, cap_msi_devid: bool) -> kvm_irq_routing_entry {
1284    match &item.source {
1285        IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry {
1286            gsi: item.gsi,
1287            type_: KVM_IRQ_ROUTING_IRQCHIP,
1288            u: kvm_irq_routing_entry__bindgen_ty_1 {
1289                irqchip: kvm_irq_routing_irqchip {
1290                    irqchip: chip_to_kvm_chip(*chip),
1291                    pin: *pin,
1292                },
1293            },
1294            ..Default::default()
1295        },
1296        IrqSource::Msi {
1297            address,
1298            data,
1299            #[cfg(target_arch = "aarch64")]
1300            pci_address,
1301        } => {
1302            // Even though we always pass the device ID along to this point, KVM docs say: "If this
1303            // capability is not available, userspace should never set the KVM_MSI_VALID_DEVID flag
1304            // as the ioctl might fail"
1305            let devid = if cap_msi_devid {
1306                #[cfg(not(target_arch = "aarch64"))]
1307                panic!("unexpected KVM_CAP_MSI_DEVID");
1308                #[cfg(target_arch = "aarch64")]
1309                Some(pci_address.to_u32())
1310            } else {
1311                None
1312            };
1313            kvm_irq_routing_entry {
1314                gsi: item.gsi,
1315                type_: KVM_IRQ_ROUTING_MSI,
1316                flags: if devid.is_some() {
1317                    KVM_MSI_VALID_DEVID
1318                } else {
1319                    0
1320                },
1321                u: kvm_irq_routing_entry__bindgen_ty_1 {
1322                    msi: kvm_irq_routing_msi {
1323                        address_lo: *address as u32,
1324                        address_hi: (*address >> 32) as u32,
1325                        data: *data,
1326                        __bindgen_anon_1: kvm_irq_routing_msi__bindgen_ty_1 {
1327                            devid: devid.unwrap_or_default(),
1328                        },
1329                    },
1330                },
1331                ..Default::default()
1332            }
1333        }
1334    }
1335}
1336
1337impl From<&kvm_mp_state> for MPState {
1338    fn from(item: &kvm_mp_state) -> Self {
1339        match item.mp_state {
1340            KVM_MP_STATE_RUNNABLE => MPState::Runnable,
1341            KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized,
1342            KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived,
1343            KVM_MP_STATE_HALTED => MPState::Halted,
1344            KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived,
1345            KVM_MP_STATE_STOPPED => MPState::Stopped,
1346            state => {
1347                error!(
1348                    "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE",
1349                    state
1350                );
1351                MPState::Runnable
1352            }
1353        }
1354    }
1355}
1356
1357impl From<&MPState> for kvm_mp_state {
1358    fn from(item: &MPState) -> Self {
1359        kvm_mp_state {
1360            mp_state: match item {
1361                MPState::Runnable => KVM_MP_STATE_RUNNABLE,
1362                MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED,
1363                MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED,
1364                MPState::Halted => KVM_MP_STATE_HALTED,
1365                MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED,
1366                MPState::Stopped => KVM_MP_STATE_STOPPED,
1367            },
1368        }
1369    }
1370}