1#[cfg(target_arch = "aarch64")]
6mod aarch64;
7#[cfg(target_arch = "riscv64")]
8mod riscv64;
9#[cfg(target_arch = "x86_64")]
10mod x86_64;
11
12mod cap;
13
14use std::cmp::Reverse;
15use std::collections::BTreeMap;
16use std::collections::BinaryHeap;
17use std::convert::TryFrom;
18use std::ffi::CString;
19use std::fs::File;
20use std::os::raw::c_ulong;
21use std::os::raw::c_void;
22use std::os::unix::prelude::OsStrExt;
23use std::path::Path;
24use std::sync::Arc;
25use std::sync::OnceLock;
26
27#[cfg(target_arch = "aarch64")]
28pub use aarch64::*;
29use base::errno_result;
30use base::error;
31use base::ioctl;
32use base::ioctl_with_mut_ref;
33use base::ioctl_with_ref;
34use base::ioctl_with_val;
35use base::pagesize;
36use base::AsRawDescriptor;
37use base::Error;
38use base::Event;
39use base::FromRawDescriptor;
40use base::MappedRegion;
41use base::MemoryMapping;
42use base::MemoryMappingBuilder;
43use base::MmapError;
44use base::Protection;
45use base::RawDescriptor;
46use base::Result;
47use base::SafeDescriptor;
48pub use cap::KvmCap;
49use cfg_if::cfg_if;
50use kvm_sys::*;
51use libc::open64;
52use libc::EFAULT;
53use libc::EINVAL;
54use libc::EIO;
55use libc::ENOENT;
56use libc::ENOSPC;
57use libc::ENOSYS;
58#[cfg(not(target_arch = "aarch64"))]
59use libc::ENOTSUP;
60use libc::EOVERFLOW;
61use libc::O_CLOEXEC;
62use libc::O_RDWR;
63#[cfg(target_arch = "riscv64")]
64use riscv64::*;
65use sync::Mutex;
66use vm_memory::GuestAddress;
67use vm_memory::GuestMemory;
68#[cfg(target_arch = "x86_64")]
69pub use x86_64::*;
70use zerocopy::FromZeros;
71
72use crate::BalloonEvent;
73use crate::ClockState;
74use crate::Config;
75use crate::Datamatch;
76use crate::DeviceKind;
77use crate::HypercallAbi;
78use crate::Hypervisor;
79use crate::HypervisorCap;
80use crate::HypervisorKind;
81use crate::IoEventAddress;
82use crate::IoOperation;
83use crate::IoParams;
84use crate::IrqRoute;
85use crate::IrqSource;
86use crate::MPState;
87use crate::MemCacheType;
88use crate::MemSlot;
89use crate::Vcpu;
90use crate::VcpuExit;
91use crate::VcpuSignalHandle;
92use crate::VcpuSignalHandleInner;
93use crate::Vm;
94use crate::VmCap;
95
96unsafe fn set_user_memory_region(
102 kvm: &KvmVm,
103 slot: MemSlot,
104 read_only: bool,
105 log_dirty_pages: bool,
106 cache: MemCacheType,
107 guest_addr: u64,
108 memory_size: u64,
109 userspace_addr: *mut u8,
110) -> Result<()> {
111 let mut use_2_variant = false;
112 let mut flags = 0;
113 if read_only {
114 flags |= KVM_MEM_READONLY;
115 }
116 if log_dirty_pages {
117 flags |= KVM_MEM_LOG_DIRTY_PAGES;
118 }
119 if kvm.caps.user_noncoherent_dma && cache == MemCacheType::CacheNonCoherent {
120 flags |= KVM_MEM_NON_COHERENT_DMA;
121 use_2_variant = kvm.caps.user_memory_region2;
122 }
123
124 let untagged_userspace_addr = untagged_addr(userspace_addr as usize);
125 let ret = if use_2_variant {
126 let region2 = kvm_userspace_memory_region2 {
127 slot,
128 flags,
129 guest_phys_addr: guest_addr,
130 memory_size,
131 userspace_addr: untagged_userspace_addr as u64,
132 guest_memfd_offset: 0,
133 guest_memfd: 0,
134 ..Default::default()
135 };
136 ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION2, ®ion2)
137 } else {
138 let region = kvm_userspace_memory_region {
139 slot,
140 flags,
141 guest_phys_addr: guest_addr,
142 memory_size,
143 userspace_addr: (untagged_userspace_addr as u64),
144 };
145 ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION, ®ion)
146 };
147
148 if ret == 0 {
149 Ok(())
150 } else {
151 errno_result()
152 }
153}
154
155#[inline]
159fn untagged_addr(addr: usize) -> usize {
160 let tag_bits_mask: u64 = if cfg!(target_arch = "aarch64") {
161 0xFF00000000000000
162 } else {
163 0
164 };
165 addr & !tag_bits_mask as usize
166}
167
168pub fn dirty_log_bitmap_size(size: usize) -> usize {
175 let page_size = pagesize();
176 size.div_ceil(page_size).div_ceil(8)
177}
178
179pub struct Kvm {
180 kvm: SafeDescriptor,
181 vcpu_mmap_size: usize,
182}
183
184impl Kvm {
185 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
186 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
187 let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
190 if ret < 0 {
191 return errno_result();
192 }
193 let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
196
197 let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION) };
200 if version < 0 {
201 return errno_result();
202 }
203
204 if version as u32 != KVM_API_VERSION {
207 error!(
208 "KVM_GET_API_VERSION: expected {}, got {}",
209 KVM_API_VERSION, version,
210 );
211 return Err(Error::new(ENOSYS));
212 }
213
214 let res = unsafe { ioctl(&kvm, KVM_GET_VCPU_MMAP_SIZE) };
217 if res <= 0 {
218 return errno_result();
219 }
220 let vcpu_mmap_size = res as usize;
221
222 Ok(Kvm {
223 kvm,
224 vcpu_mmap_size,
225 })
226 }
227
228 pub fn new() -> Result<Kvm> {
230 Kvm::new_with_path(Path::new("/dev/kvm"))
231 }
232}
233
234impl AsRawDescriptor for Kvm {
235 fn as_raw_descriptor(&self) -> RawDescriptor {
236 self.kvm.as_raw_descriptor()
237 }
238}
239
240impl Hypervisor for Kvm {
241 fn try_clone(&self) -> Result<Self> {
242 Ok(Kvm {
243 kvm: self.kvm.try_clone()?,
244 vcpu_mmap_size: self.vcpu_mmap_size,
245 })
246 }
247
248 fn check_capability(&self, cap: HypervisorCap) -> bool {
249 if let Ok(kvm_cap) = KvmCap::try_from(cap) {
250 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, kvm_cap as c_ulong) == 1 }
254 } else {
255 false
257 }
258 }
259}
260
261#[derive(Clone, Default)]
263struct KvmVmCaps {
264 kvmclock_ctrl: bool,
265 user_noncoherent_dma: bool,
266 user_memory_region2: bool,
267 msi_devid: Arc<OnceLock<bool>>,
270}
271
272pub struct KvmVm {
274 kvm: Kvm,
275 vm: SafeDescriptor,
276 guest_mem: GuestMemory,
277 mem_regions: Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>,
278 mem_slot_gaps: Mutex<BinaryHeap<Reverse<MemSlot>>>,
280 caps: KvmVmCaps,
281 force_disable_readonly_mem: bool,
282}
283
284impl KvmVm {
285 pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> {
287 let ret = unsafe {
291 ioctl_with_val(
292 kvm,
293 KVM_CREATE_VM,
294 kvm.get_vm_type(cfg.protection_type)? as c_ulong,
295 )
296 };
297 if ret < 0 {
298 return errno_result();
299 }
300 let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
303 let mut vm = KvmVm {
304 kvm: kvm.try_clone()?,
305 vm: vm_descriptor,
306 guest_mem,
307 mem_regions: Default::default(),
308 mem_slot_gaps: Default::default(),
309 caps: Default::default(),
310 force_disable_readonly_mem: cfg.force_disable_readonly_mem,
311 };
312 vm.caps.kvmclock_ctrl = vm.check_raw_capability(KvmCap::KvmclockCtrl);
313 vm.caps.user_noncoherent_dma = vm.check_raw_capability(KvmCap::MemNoncoherentDma);
314 vm.caps.user_memory_region2 = vm.check_raw_capability(KvmCap::UserMemory2);
315
316 vm.init_arch(&cfg)?;
317
318 for region in vm.guest_mem.regions() {
319 unsafe {
322 set_user_memory_region(
323 &vm,
324 region.index as MemSlot,
325 false,
326 false,
327 MemCacheType::CacheCoherent,
328 region.guest_addr.offset(),
329 region.size as u64,
330 region.host_addr as *mut u8,
331 )
332 }?;
333 }
334
335 Ok(vm)
336 }
337
338 pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> {
339 let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU, c_ulong::try_from(id).unwrap()) };
342 if fd < 0 {
343 return errno_result();
344 }
345
346 let vcpu = unsafe { File::from_raw_descriptor(fd) };
350
351 let run_mmap = MemoryMappingBuilder::new(self.kvm.vcpu_mmap_size)
356 .from_file(&vcpu)
357 .build()
358 .map_err(|_| Error::new(ENOSPC))?;
359
360 Ok(KvmVcpu {
361 #[cfg(target_arch = "x86_64")]
362 kvm: self.kvm.try_clone()?,
363 #[cfg(not(target_arch = "riscv64"))]
364 vm: self.vm.try_clone()?,
365 vcpu,
366 id,
367 cap_kvmclock_ctrl: self.caps.kvmclock_ctrl,
368 run_mmap: Arc::new(run_mmap),
369 })
370 }
371
372 pub fn create_irq_chip(&self) -> Result<()> {
376 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP) };
379 if ret == 0 {
380 Ok(())
381 } else {
382 errno_result()
383 }
384 }
385
386 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
388 let mut irq_level = kvm_irq_level::default();
389 irq_level.__bindgen_anon_1.irq = irq;
390 irq_level.level = active.into();
391
392 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE, &irq_level) };
396 if ret == 0 {
397 Ok(())
398 } else {
399 errno_result()
400 }
401 }
402
403 pub fn register_irqfd(
406 &self,
407 gsi: u32,
408 evt: &Event,
409 resample_evt: Option<&Event>,
410 ) -> Result<()> {
411 let mut irqfd = kvm_irqfd {
412 fd: evt.as_raw_descriptor() as u32,
413 gsi,
414 ..Default::default()
415 };
416
417 if let Some(r_evt) = resample_evt {
418 irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE;
419 irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
420 }
421
422 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
426 if ret == 0 {
427 Ok(())
428 } else {
429 errno_result()
430 }
431 }
432
433 pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
439 let irqfd = kvm_irqfd {
440 fd: evt.as_raw_descriptor() as u32,
441 gsi,
442 flags: KVM_IRQFD_FLAG_DEASSIGN,
443 ..Default::default()
444 };
445 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
449 if ret == 0 {
450 Ok(())
451 } else {
452 errno_result()
453 }
454 }
455
456 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
459 let mut irq_routing =
460 kvm_irq_routing::<[kvm_irq_routing_entry]>::new_box_zeroed_with_elems(routes.len())
461 .unwrap();
462 irq_routing.nr = routes.len() as u32;
463
464 let cap_msi_devid = *self
465 .caps
466 .msi_devid
467 .get_or_init(|| self.check_raw_capability(KvmCap::MsiDevid));
468
469 for (route, irq_route) in routes.iter().zip(irq_routing.entries.iter_mut()) {
470 *irq_route = to_kvm_irq_routing_entry(route, cap_msi_devid);
471 }
472
473 #[allow(clippy::undocumented_unsafe_blocks)]
475 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING, &*irq_routing) };
476 if ret == 0 {
477 Ok(())
478 } else {
479 errno_result()
480 }
481 }
482
483 fn ioeventfd(
484 &self,
485 evt: Event,
486 addr: IoEventAddress,
487 datamatch: Datamatch,
488 deassign: bool,
489 ) -> Result<()> {
490 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
491 Datamatch::AnyLength => (false, 0, 0),
492 Datamatch::U8(v) => match v {
493 Some(u) => (true, u as u64, 1),
494 None => (false, 0, 1),
495 },
496 Datamatch::U16(v) => match v {
497 Some(u) => (true, u as u64, 2),
498 None => (false, 0, 2),
499 },
500 Datamatch::U32(v) => match v {
501 Some(u) => (true, u as u64, 4),
502 None => (false, 0, 4),
503 },
504 Datamatch::U64(v) => match v {
505 Some(u) => (true, u, 8),
506 None => (false, 0, 8),
507 },
508 };
509 let mut flags = 0;
510 if deassign {
511 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
512 }
513 if do_datamatch {
514 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
515 }
516 if let IoEventAddress::Pio(_) = addr {
517 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
518 }
519 let ioeventfd = kvm_ioeventfd {
520 datamatch: datamatch_value,
521 len: datamatch_len,
522 addr: match addr {
523 IoEventAddress::Pio(p) => p,
524 IoEventAddress::Mmio(m) => m,
525 },
526 fd: evt.as_raw_descriptor(),
527 flags,
528 ..Default::default()
529 };
530 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD, &ioeventfd) };
534 if ret == 0 {
535 Ok(())
536 } else {
537 errno_result()
538 }
539 }
540
541 pub fn check_raw_capability(&self, capability: KvmCap) -> bool {
543 let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, capability as c_ulong) };
547 match capability {
548 #[cfg(target_arch = "x86_64")]
549 KvmCap::BusLockDetect => {
550 if ret > 0 {
551 ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT
552 } else {
553 false
554 }
555 }
556 _ => ret == 1,
557 }
558 }
559
560 #[allow(dead_code)]
562 unsafe fn enable_raw_capability(
569 &self,
570 capability: KvmCap,
571 flags: u32,
572 args: &[u64; 4],
573 ) -> Result<()> {
574 let kvm_cap = kvm_enable_cap {
575 cap: capability as u32,
576 args: *args,
577 flags,
578 ..Default::default()
579 };
580 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
584 if ret == 0 {
585 Ok(())
586 } else {
587 errno_result()
588 }
589 }
590
591 fn handle_inflate(&self, guest_address: GuestAddress, size: u64) -> Result<()> {
592 match self.guest_mem.remove_range(guest_address, size) {
593 Ok(_) => Ok(()),
594 Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
595 Err(_) => Err(Error::new(EIO)),
596 }
597 }
598
599 fn handle_deflate(&self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
600 Ok(())
602 }
603}
604
605impl Vm for KvmVm {
606 fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
607 self.vm.try_clone()
608 }
609
610 fn hypervisor_kind(&self) -> HypervisorKind {
611 HypervisorKind::Kvm
612 }
613
614 fn check_capability(&self, c: VmCap) -> bool {
615 if let Some(val) = self.check_capability_arch(c) {
616 return val;
617 }
618 match c {
619 #[cfg(target_arch = "aarch64")]
620 VmCap::ArmPmuV3 => self.check_raw_capability(KvmCap::ArmPmuV3),
621 VmCap::DirtyLog => true,
622 VmCap::PvClock => false,
623 VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm),
624 VmCap::EarlyInitCpuid => false,
625 #[cfg(target_arch = "x86_64")]
626 VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect),
627 VmCap::ReadOnlyMemoryRegion => {
628 !self.force_disable_readonly_mem && self.check_raw_capability(KvmCap::ReadonlyMem)
629 }
630 VmCap::MemNoncoherentDma => {
631 cfg!(feature = "noncoherent-dma")
632 && self.check_raw_capability(KvmCap::MemNoncoherentDma)
633 }
634 #[cfg(target_arch = "aarch64")]
635 VmCap::Sve => self.check_raw_capability(KvmCap::Sve),
636 }
637 }
638
639 fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> {
640 match c {
641 #[cfg(target_arch = "x86_64")]
642 VmCap::BusLockDetect => {
643 let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0];
644 Ok(
645 #[allow(clippy::undocumented_unsafe_blocks)]
647 unsafe {
648 self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(())
649 },
650 )
651 }
652 _ => Ok(false),
653 }
654 }
655
656 fn get_guest_phys_addr_bits(&self) -> u8 {
657 self.kvm.get_guest_phys_addr_bits()
658 }
659
660 fn get_memory(&self) -> &GuestMemory {
661 &self.guest_mem
662 }
663
664 fn add_memory_region(
665 &self,
666 guest_addr: GuestAddress,
667 mem: Box<dyn MappedRegion>,
668 read_only: bool,
669 log_dirty_pages: bool,
670 cache: MemCacheType,
671 ) -> Result<MemSlot> {
672 let pgsz = pagesize() as u64;
673 let size = (mem.size() as u64).next_multiple_of(pgsz);
677 let end_addr = guest_addr
678 .checked_add(size)
679 .ok_or_else(|| Error::new(EOVERFLOW))?;
680 if self.guest_mem.range_overlap(guest_addr, end_addr) {
681 return Err(Error::new(ENOSPC));
682 }
683 let mut regions = self.mem_regions.lock();
684 let mut gaps = self.mem_slot_gaps.lock();
685 let slot = match gaps.pop() {
686 Some(gap) => gap.0,
687 None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
688 };
689
690 let res = unsafe {
696 set_user_memory_region(
697 self,
698 slot,
699 read_only,
700 log_dirty_pages,
701 cache,
702 guest_addr.offset(),
703 size,
704 mem.as_ptr(),
705 )
706 };
707
708 if let Err(e) = res {
709 gaps.push(Reverse(slot));
710 return Err(e);
711 }
712 regions.insert(slot, mem);
713 Ok(slot)
714 }
715
716 fn enable_hypercalls(&self, nr: u64, count: usize) -> Result<()> {
717 cfg_if! {
718 if #[cfg(target_arch = "aarch64")] {
719 let base = u32::try_from(nr).unwrap();
720 let nr_functions = u32::try_from(count).unwrap();
721 self.enable_smccc_forwarding(base, nr_functions)
722 } else {
723 let _ = nr;
724 let _ = count;
725 Err(Error::new(ENOTSUP))
726 }
727 }
728 }
729
730 fn msync_memory_region(&self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
731 let mut regions = self.mem_regions.lock();
732 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
733
734 mem.msync(offset, size).map_err(|err| match err {
735 MmapError::InvalidAddress => Error::new(EFAULT),
736 MmapError::NotPageAligned => Error::new(EINVAL),
737 MmapError::SystemCallFailed(e) => e,
738 _ => Error::new(EIO),
739 })
740 }
741
742 fn madvise_pageout_memory_region(
743 &self,
744 slot: MemSlot,
745 offset: usize,
746 size: usize,
747 ) -> Result<()> {
748 let mut regions = self.mem_regions.lock();
749 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
750
751 mem.madvise(offset, size, libc::MADV_PAGEOUT)
752 .map_err(|err| match err {
753 MmapError::InvalidAddress => Error::new(EFAULT),
754 MmapError::NotPageAligned => Error::new(EINVAL),
755 MmapError::SystemCallFailed(e) => e,
756 _ => Error::new(EIO),
757 })
758 }
759
760 fn madvise_remove_memory_region(
761 &self,
762 slot: MemSlot,
763 offset: usize,
764 size: usize,
765 ) -> Result<()> {
766 let mut regions = self.mem_regions.lock();
767 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
768
769 mem.madvise(offset, size, libc::MADV_REMOVE)
770 .map_err(|err| match err {
771 MmapError::InvalidAddress => Error::new(EFAULT),
772 MmapError::NotPageAligned => Error::new(EINVAL),
773 MmapError::SystemCallFailed(e) => e,
774 _ => Error::new(EIO),
775 })
776 }
777
778 fn remove_memory_region(&self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
779 let mut regions = self.mem_regions.lock();
780 if !regions.contains_key(&slot) {
781 return Err(Error::new(ENOENT));
782 }
783 unsafe {
786 set_user_memory_region(
787 self,
788 slot,
789 false,
790 false,
791 MemCacheType::CacheCoherent,
792 0,
793 0,
794 std::ptr::null_mut(),
795 )?;
796 }
797 self.mem_slot_gaps.lock().push(Reverse(slot));
798 Ok(regions.remove(&slot).unwrap())
800 }
801
802 fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> {
803 let mut device = if let Some(dev) = self.get_device_params_arch(kind) {
804 dev
805 } else {
806 match kind {
807 DeviceKind::Vfio => kvm_create_device {
808 type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
809 fd: 0,
810 flags: 0,
811 },
812
813 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
815 _ => return Err(Error::new(libc::ENXIO)),
816 }
817 };
818
819 let ret = unsafe { base::ioctl_with_mut_ref(self, KVM_CREATE_DEVICE, &mut device) };
823 if ret == 0 {
824 Ok(
825 unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) },
828 )
829 } else {
830 errno_result()
831 }
832 }
833
834 fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> {
835 let regions = self.mem_regions.lock();
836 let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?;
837 if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() {
839 return Err(Error::new(EINVAL));
840 }
841
842 let mut dirty_log_kvm = kvm_dirty_log {
843 slot,
844 ..Default::default()
845 };
846 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
847 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG, &dirty_log_kvm) };
851 if ret == 0 {
852 Ok(())
853 } else {
854 errno_result()
855 }
856 }
857
858 fn register_ioevent(
859 &self,
860 evt: Event,
861 addr: IoEventAddress,
862 datamatch: Datamatch,
863 ) -> Result<()> {
864 self.ioeventfd(evt, addr, datamatch, false)
865 }
866
867 fn unregister_ioevent(
868 &self,
869 evt: Event,
870 addr: IoEventAddress,
871 datamatch: Datamatch,
872 ) -> Result<()> {
873 self.ioeventfd(evt, addr, datamatch, true)
874 }
875
876 fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
877 Ok(())
879 }
880
881 fn get_pvclock(&self) -> Result<ClockState> {
882 self.get_pvclock_arch()
883 }
884
885 fn set_pvclock(&self, state: &ClockState) -> Result<()> {
886 self.set_pvclock_arch(state)
887 }
888
889 fn add_fd_mapping(
890 &self,
891 slot: u32,
892 offset: usize,
893 size: usize,
894 fd: &dyn AsRawDescriptor,
895 fd_offset: u64,
896 prot: Protection,
897 ) -> Result<()> {
898 let mut regions = self.mem_regions.lock();
899 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
900
901 match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
902 Ok(()) => Ok(()),
903 Err(MmapError::SystemCallFailed(e)) => Err(e),
904 Err(_) => Err(Error::new(EIO)),
905 }
906 }
907
908 fn remove_mapping(&self, slot: u32, offset: usize, size: usize) -> Result<()> {
909 let mut regions = self.mem_regions.lock();
910 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
911
912 match region.remove_mapping(offset, size) {
913 Ok(()) => Ok(()),
914 Err(MmapError::SystemCallFailed(e)) => Err(e),
915 Err(_) => Err(Error::new(EIO)),
916 }
917 }
918
919 fn handle_balloon_event(&self, event: BalloonEvent) -> Result<()> {
920 match event {
921 BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
922 BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
923 BalloonEvent::BalloonTargetReached(_) => Ok(()),
924 }
925 }
926}
927
928impl AsRawDescriptor for KvmVm {
929 fn as_raw_descriptor(&self) -> RawDescriptor {
930 self.vm.as_raw_descriptor()
931 }
932}
933
934struct KvmVcpuSignalHandle {
935 run_mmap: Arc<MemoryMapping>,
936}
937
938impl VcpuSignalHandleInner for KvmVcpuSignalHandle {
939 fn signal_immediate_exit(&self) {
940 unsafe {
943 let run = self.run_mmap.as_ptr() as *mut kvm_run;
944 (*run).immediate_exit = 1;
945 }
946 }
947}
948
949pub struct KvmVcpu {
951 #[cfg(target_arch = "x86_64")]
952 kvm: Kvm,
953 #[cfg(not(target_arch = "riscv64"))]
954 vm: SafeDescriptor,
955 vcpu: File,
956 id: usize,
957 cap_kvmclock_ctrl: bool,
958 run_mmap: Arc<MemoryMapping>,
959}
960
961impl Vcpu for KvmVcpu {
962 fn id(&self) -> usize {
963 self.id
964 }
965
966 #[allow(clippy::cast_ptr_alignment)]
967 fn set_immediate_exit(&self, exit: bool) {
968 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
973 run.immediate_exit = exit.into();
974 }
975
976 fn signal_handle(&self) -> VcpuSignalHandle {
977 VcpuSignalHandle {
978 inner: Box::new(KvmVcpuSignalHandle {
979 run_mmap: self.run_mmap.clone(),
980 }),
981 }
982 }
983
984 fn on_suspend(&self) -> Result<()> {
985 if self.cap_kvmclock_ctrl {
990 if unsafe { ioctl(self, KVM_KVMCLOCK_CTRL) } != 0 {
993 if Error::last().errno() != libc::EINVAL {
996 return errno_result();
997 }
998 }
999 }
1000
1001 Ok(())
1002 }
1003
1004 unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> {
1005 let kvm_cap = kvm_enable_cap {
1006 cap,
1007 args: *args,
1008 ..Default::default()
1009 };
1010 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
1014 if ret == 0 {
1015 Ok(())
1016 } else {
1017 errno_result()
1018 }
1019 }
1020
1021 #[allow(clippy::cast_ptr_alignment)]
1022 fn run(&self) -> Result<VcpuExit> {
1025 let ret = unsafe { ioctl(self, KVM_RUN) };
1028 if ret != 0 {
1029 return errno_result();
1030 }
1031
1032 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1036
1037 if let Some(vcpu_exit) = self.handle_vm_exit_arch(run) {
1040 return Ok(vcpu_exit);
1041 }
1042
1043 match run.exit_reason {
1044 KVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
1045 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1046 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1047 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1048 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1049 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown(Ok(()))),
1050 KVM_EXIT_FAIL_ENTRY => {
1051 let hardware_entry_failure_reason = unsafe {
1055 run.__bindgen_anon_1
1056 .fail_entry
1057 .hardware_entry_failure_reason
1058 };
1059 Ok(VcpuExit::FailEntry {
1060 hardware_entry_failure_reason,
1061 })
1062 }
1063 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1064 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1065 KVM_EXIT_SYSTEM_EVENT => {
1066 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1070 let event_flags =
1071 unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1075 match event_type {
1076 KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1077 KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags),
1078 KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1079 _ => {
1080 error!(
1081 "Unknown KVM system event {} with flags {}",
1082 event_type, event_flags
1083 );
1084 Err(Error::new(EINVAL))
1085 }
1086 }
1087 }
1088 r => panic!("unknown kvm exit reason: {r}"),
1089 }
1090 }
1091
1092 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
1093 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1097 assert!(run.exit_reason == KVM_EXIT_MMIO);
1099 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1103 let address = mmio.phys_addr;
1104 let data = &mut mmio.data[..mmio.len as usize];
1105 if mmio.is_write != 0 {
1106 handle_fn(IoParams {
1107 address,
1108 operation: IoOperation::Write(data),
1109 })
1110 } else {
1111 handle_fn(IoParams {
1112 address,
1113 operation: IoOperation::Read(data),
1114 })
1115 }
1116 }
1117
1118 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
1119 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1123 assert!(run.exit_reason == KVM_EXIT_IO);
1125 let io = unsafe { run.__bindgen_anon_1.io };
1129 let address = u64::from(io.port);
1130 let size = usize::from(io.size);
1131 let count = io.count as usize;
1132 let data_len = count * size;
1133 let data_offset = io.data_offset as usize;
1134 assert!(data_offset + data_len <= self.run_mmap.size());
1135
1136 let buffer: &mut [u8] = unsafe {
1140 std::slice::from_raw_parts_mut(
1141 (run as *mut kvm_run as *mut u8).add(data_offset),
1142 data_len,
1143 )
1144 };
1145 let data_chunks = buffer.chunks_mut(size);
1146
1147 if io.direction == KVM_EXIT_IO_IN as u8 {
1148 for data in data_chunks {
1149 handle_fn(IoParams {
1150 address,
1151 operation: IoOperation::Read(data),
1152 });
1153 }
1154 } else {
1155 debug_assert_eq!(io.direction, KVM_EXIT_IO_OUT as u8);
1156 for data in data_chunks {
1157 handle_fn(IoParams {
1158 address,
1159 operation: IoOperation::Write(data),
1160 });
1161 }
1162 }
1163
1164 Ok(())
1165 }
1166
1167 fn handle_hypercall(
1168 &self,
1169 handle_fn: &mut dyn FnMut(&mut HypercallAbi) -> anyhow::Result<()>,
1170 ) -> anyhow::Result<()> {
1171 cfg_if! {
1172 if #[cfg(target_arch = "aarch64")] {
1173 self.handle_smccc_call(handle_fn)
1175 } else {
1176 let _ = handle_fn;
1177 unimplemented!("KvmVcpu::handle_hypercall() not supported");
1178 }
1179 }
1180 }
1181}
1182
1183impl KvmVcpu {
1184 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1192 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1194 let ret = {
1195 unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE, &mut state) }
1200 };
1201 if ret < 0 {
1202 return errno_result();
1203 }
1204 Ok(state)
1205 }
1206
1207 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1215 let ret = {
1216 unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE, state) }
1219 };
1220 if ret < 0 {
1221 return errno_result();
1222 }
1223 Ok(())
1224 }
1225}
1226
1227impl AsRawDescriptor for KvmVcpu {
1228 fn as_raw_descriptor(&self) -> RawDescriptor {
1229 self.vcpu.as_raw_descriptor()
1230 }
1231}
1232
1233impl TryFrom<HypervisorCap> for KvmCap {
1234 type Error = Error;
1235
1236 fn try_from(cap: HypervisorCap) -> Result<KvmCap> {
1237 match cap {
1238 HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit),
1239 HypervisorCap::UserMemory => Ok(KvmCap::UserMemory),
1240 #[cfg(target_arch = "x86_64")]
1241 HypervisorCap::Xcrs => Ok(KvmCap::Xcrs),
1242 #[cfg(target_arch = "x86_64")]
1243 HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)),
1244 HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)),
1245 HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)),
1246 }
1247 }
1248}
1249
1250fn to_kvm_irq_routing_entry(item: &IrqRoute, cap_msi_devid: bool) -> kvm_irq_routing_entry {
1251 match &item.source {
1252 IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry {
1253 gsi: item.gsi,
1254 type_: KVM_IRQ_ROUTING_IRQCHIP,
1255 u: kvm_irq_routing_entry__bindgen_ty_1 {
1256 irqchip: kvm_irq_routing_irqchip {
1257 irqchip: chip_to_kvm_chip(*chip),
1258 pin: *pin,
1259 },
1260 },
1261 ..Default::default()
1262 },
1263 IrqSource::Msi {
1264 address,
1265 data,
1266 #[cfg(target_arch = "aarch64")]
1267 pci_address,
1268 } => {
1269 let devid = if cap_msi_devid {
1273 #[cfg(not(target_arch = "aarch64"))]
1274 panic!("unexpected KVM_CAP_MSI_DEVID");
1275 #[cfg(target_arch = "aarch64")]
1276 Some(pci_address.to_u32())
1277 } else {
1278 None
1279 };
1280 kvm_irq_routing_entry {
1281 gsi: item.gsi,
1282 type_: KVM_IRQ_ROUTING_MSI,
1283 flags: if devid.is_some() {
1284 KVM_MSI_VALID_DEVID
1285 } else {
1286 0
1287 },
1288 u: kvm_irq_routing_entry__bindgen_ty_1 {
1289 msi: kvm_irq_routing_msi {
1290 address_lo: *address as u32,
1291 address_hi: (*address >> 32) as u32,
1292 data: *data,
1293 __bindgen_anon_1: kvm_irq_routing_msi__bindgen_ty_1 {
1294 devid: devid.unwrap_or_default(),
1295 },
1296 },
1297 },
1298 ..Default::default()
1299 }
1300 }
1301 }
1302}
1303
1304impl From<&kvm_mp_state> for MPState {
1305 fn from(item: &kvm_mp_state) -> Self {
1306 match item.mp_state {
1307 KVM_MP_STATE_RUNNABLE => MPState::Runnable,
1308 KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized,
1309 KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived,
1310 KVM_MP_STATE_HALTED => MPState::Halted,
1311 KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived,
1312 KVM_MP_STATE_STOPPED => MPState::Stopped,
1313 state => {
1314 error!(
1315 "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE",
1316 state
1317 );
1318 MPState::Runnable
1319 }
1320 }
1321 }
1322}
1323
1324impl From<&MPState> for kvm_mp_state {
1325 fn from(item: &MPState) -> Self {
1326 kvm_mp_state {
1327 mp_state: match item {
1328 MPState::Runnable => KVM_MP_STATE_RUNNABLE,
1329 MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED,
1330 MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED,
1331 MPState::Halted => KVM_MP_STATE_HALTED,
1332 MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED,
1333 MPState::Stopped => KVM_MP_STATE_STOPPED,
1334 },
1335 }
1336 }
1337}