1#[cfg(target_arch = "aarch64")]
6mod aarch64;
7#[cfg(target_arch = "riscv64")]
8mod riscv64;
9#[cfg(target_arch = "x86_64")]
10mod x86_64;
11
12mod cap;
13
14use std::cmp::Reverse;
15use std::collections::BTreeMap;
16use std::collections::BinaryHeap;
17use std::convert::TryFrom;
18use std::ffi::CString;
19use std::fs::File;
20use std::os::raw::c_ulong;
21use std::os::raw::c_void;
22use std::os::unix::prelude::OsStrExt;
23use std::path::Path;
24use std::sync::Arc;
25use std::sync::OnceLock;
26
27#[cfg(target_arch = "aarch64")]
28pub use aarch64::*;
29use base::errno_result;
30use base::error;
31use base::ioctl;
32use base::ioctl_with_mut_ref;
33use base::ioctl_with_ref;
34use base::ioctl_with_val;
35use base::pagesize;
36use base::AsRawDescriptor;
37use base::Error;
38use base::Event;
39use base::FromRawDescriptor;
40use base::MappedRegion;
41use base::MemoryMapping;
42use base::MemoryMappingBuilder;
43use base::MmapError;
44use base::Protection;
45use base::RawDescriptor;
46use base::Result;
47use base::SafeDescriptor;
48pub use cap::KvmCap;
49use data_model::vec_with_array_field;
50use kvm_sys::*;
51use libc::open64;
52use libc::EFAULT;
53use libc::EINVAL;
54use libc::EIO;
55use libc::ENOENT;
56use libc::ENOSPC;
57use libc::ENOSYS;
58use libc::EOVERFLOW;
59use libc::O_CLOEXEC;
60use libc::O_RDWR;
61#[cfg(target_arch = "riscv64")]
62use riscv64::*;
63use sync::Mutex;
64use vm_memory::GuestAddress;
65use vm_memory::GuestMemory;
66#[cfg(target_arch = "x86_64")]
67pub use x86_64::*;
68
69use crate::BalloonEvent;
70use crate::ClockState;
71use crate::Config;
72use crate::Datamatch;
73use crate::DeviceKind;
74use crate::Hypervisor;
75use crate::HypervisorCap;
76use crate::HypervisorKind;
77use crate::IoEventAddress;
78use crate::IoOperation;
79use crate::IoParams;
80use crate::IrqRoute;
81use crate::IrqSource;
82use crate::MPState;
83use crate::MemCacheType;
84use crate::MemSlot;
85use crate::Vcpu;
86use crate::VcpuExit;
87use crate::VcpuSignalHandle;
88use crate::VcpuSignalHandleInner;
89use crate::Vm;
90use crate::VmCap;
91
92unsafe fn set_user_memory_region(
98 kvm: &KvmVm,
99 slot: MemSlot,
100 read_only: bool,
101 log_dirty_pages: bool,
102 cache: MemCacheType,
103 guest_addr: u64,
104 memory_size: u64,
105 userspace_addr: *mut u8,
106) -> Result<()> {
107 let mut use_2_variant = false;
108 let mut flags = 0;
109 if read_only {
110 flags |= KVM_MEM_READONLY;
111 }
112 if log_dirty_pages {
113 flags |= KVM_MEM_LOG_DIRTY_PAGES;
114 }
115 if kvm.caps.user_noncoherent_dma && cache == MemCacheType::CacheNonCoherent {
116 flags |= KVM_MEM_NON_COHERENT_DMA;
117 use_2_variant = kvm.caps.user_memory_region2;
118 }
119
120 let untagged_userspace_addr = untagged_addr(userspace_addr as usize);
121 let ret = if use_2_variant {
122 let region2 = kvm_userspace_memory_region2 {
123 slot,
124 flags,
125 guest_phys_addr: guest_addr,
126 memory_size,
127 userspace_addr: untagged_userspace_addr as u64,
128 guest_memfd_offset: 0,
129 guest_memfd: 0,
130 ..Default::default()
131 };
132 ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION2, ®ion2)
133 } else {
134 let region = kvm_userspace_memory_region {
135 slot,
136 flags,
137 guest_phys_addr: guest_addr,
138 memory_size,
139 userspace_addr: (untagged_userspace_addr as u64),
140 };
141 ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION, ®ion)
142 };
143
144 if ret == 0 {
145 Ok(())
146 } else {
147 errno_result()
148 }
149}
150
151#[inline]
155fn untagged_addr(addr: usize) -> usize {
156 let tag_bits_mask: u64 = if cfg!(target_arch = "aarch64") {
157 0xFF00000000000000
158 } else {
159 0
160 };
161 addr & !tag_bits_mask as usize
162}
163
164pub fn dirty_log_bitmap_size(size: usize) -> usize {
171 let page_size = pagesize();
172 size.div_ceil(page_size).div_ceil(8)
173}
174
175pub struct Kvm {
176 kvm: SafeDescriptor,
177 vcpu_mmap_size: usize,
178}
179
180impl Kvm {
181 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
182 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
183 let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
186 if ret < 0 {
187 return errno_result();
188 }
189 let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
192
193 let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION) };
196 if version < 0 {
197 return errno_result();
198 }
199
200 if version as u32 != KVM_API_VERSION {
203 error!(
204 "KVM_GET_API_VERSION: expected {}, got {}",
205 KVM_API_VERSION, version,
206 );
207 return Err(Error::new(ENOSYS));
208 }
209
210 let res = unsafe { ioctl(&kvm, KVM_GET_VCPU_MMAP_SIZE) };
213 if res <= 0 {
214 return errno_result();
215 }
216 let vcpu_mmap_size = res as usize;
217
218 Ok(Kvm {
219 kvm,
220 vcpu_mmap_size,
221 })
222 }
223
224 pub fn new() -> Result<Kvm> {
226 Kvm::new_with_path(Path::new("/dev/kvm"))
227 }
228}
229
230impl AsRawDescriptor for Kvm {
231 fn as_raw_descriptor(&self) -> RawDescriptor {
232 self.kvm.as_raw_descriptor()
233 }
234}
235
236impl Hypervisor for Kvm {
237 fn try_clone(&self) -> Result<Self> {
238 Ok(Kvm {
239 kvm: self.kvm.try_clone()?,
240 vcpu_mmap_size: self.vcpu_mmap_size,
241 })
242 }
243
244 fn check_capability(&self, cap: HypervisorCap) -> bool {
245 if let Ok(kvm_cap) = KvmCap::try_from(cap) {
246 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, kvm_cap as c_ulong) == 1 }
250 } else {
251 false
253 }
254 }
255}
256
257#[derive(Clone, Default)]
259struct KvmVmCaps {
260 kvmclock_ctrl: bool,
261 user_noncoherent_dma: bool,
262 user_memory_region2: bool,
263 msi_devid: Arc<OnceLock<bool>>,
266}
267
268pub struct KvmVm {
270 kvm: Kvm,
271 vm: SafeDescriptor,
272 guest_mem: GuestMemory,
273 mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
274 mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
276 caps: KvmVmCaps,
277 force_disable_readonly_mem: bool,
278}
279
280impl KvmVm {
281 pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> {
283 let ret = unsafe {
287 ioctl_with_val(
288 kvm,
289 KVM_CREATE_VM,
290 kvm.get_vm_type(cfg.protection_type)? as c_ulong,
291 )
292 };
293 if ret < 0 {
294 return errno_result();
295 }
296 let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
299 let mut vm = KvmVm {
300 kvm: kvm.try_clone()?,
301 vm: vm_descriptor,
302 guest_mem,
303 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
304 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
305 caps: Default::default(),
306 force_disable_readonly_mem: cfg.force_disable_readonly_mem,
307 };
308 vm.caps.kvmclock_ctrl = vm.check_raw_capability(KvmCap::KvmclockCtrl);
309 vm.caps.user_noncoherent_dma = vm.check_raw_capability(KvmCap::MemNoncoherentDma);
310 vm.caps.user_memory_region2 = vm.check_raw_capability(KvmCap::UserMemory2);
311
312 vm.init_arch(&cfg)?;
313
314 for region in vm.guest_mem.regions() {
315 unsafe {
318 set_user_memory_region(
319 &vm,
320 region.index as MemSlot,
321 false,
322 false,
323 MemCacheType::CacheCoherent,
324 region.guest_addr.offset(),
325 region.size as u64,
326 region.host_addr as *mut u8,
327 )
328 }?;
329 }
330
331 Ok(vm)
332 }
333
334 pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> {
335 let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU, c_ulong::try_from(id).unwrap()) };
338 if fd < 0 {
339 return errno_result();
340 }
341
342 let vcpu = unsafe { File::from_raw_descriptor(fd) };
346
347 let run_mmap = MemoryMappingBuilder::new(self.kvm.vcpu_mmap_size)
352 .from_file(&vcpu)
353 .build()
354 .map_err(|_| Error::new(ENOSPC))?;
355
356 Ok(KvmVcpu {
357 kvm: self.kvm.try_clone()?,
358 vm: self.vm.try_clone()?,
359 vcpu,
360 id,
361 cap_kvmclock_ctrl: self.caps.kvmclock_ctrl,
362 run_mmap: Arc::new(run_mmap),
363 })
364 }
365
366 pub fn create_irq_chip(&self) -> Result<()> {
370 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP) };
373 if ret == 0 {
374 Ok(())
375 } else {
376 errno_result()
377 }
378 }
379
380 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
382 let mut irq_level = kvm_irq_level::default();
383 irq_level.__bindgen_anon_1.irq = irq;
384 irq_level.level = active.into();
385
386 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE, &irq_level) };
390 if ret == 0 {
391 Ok(())
392 } else {
393 errno_result()
394 }
395 }
396
397 pub fn register_irqfd(
400 &self,
401 gsi: u32,
402 evt: &Event,
403 resample_evt: Option<&Event>,
404 ) -> Result<()> {
405 let mut irqfd = kvm_irqfd {
406 fd: evt.as_raw_descriptor() as u32,
407 gsi,
408 ..Default::default()
409 };
410
411 if let Some(r_evt) = resample_evt {
412 irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE;
413 irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
414 }
415
416 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
420 if ret == 0 {
421 Ok(())
422 } else {
423 errno_result()
424 }
425 }
426
427 pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
433 let irqfd = kvm_irqfd {
434 fd: evt.as_raw_descriptor() as u32,
435 gsi,
436 flags: KVM_IRQFD_FLAG_DEASSIGN,
437 ..Default::default()
438 };
439 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
443 if ret == 0 {
444 Ok(())
445 } else {
446 errno_result()
447 }
448 }
449
450 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
453 let mut irq_routing =
454 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
455 irq_routing[0].nr = routes.len() as u32;
456
457 let cap_msi_devid = *self
458 .caps
459 .msi_devid
460 .get_or_init(|| self.check_raw_capability(KvmCap::MsiDevid));
461
462 let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
466 for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
467 *irq_route = to_kvm_irq_routing_entry(route, cap_msi_devid);
468 }
469
470 #[allow(clippy::undocumented_unsafe_blocks)]
472 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING, &irq_routing[0]) };
473 if ret == 0 {
474 Ok(())
475 } else {
476 errno_result()
477 }
478 }
479
480 fn ioeventfd(
481 &self,
482 evt: &Event,
483 addr: IoEventAddress,
484 datamatch: Datamatch,
485 deassign: bool,
486 ) -> Result<()> {
487 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
488 Datamatch::AnyLength => (false, 0, 0),
489 Datamatch::U8(v) => match v {
490 Some(u) => (true, u as u64, 1),
491 None => (false, 0, 1),
492 },
493 Datamatch::U16(v) => match v {
494 Some(u) => (true, u as u64, 2),
495 None => (false, 0, 2),
496 },
497 Datamatch::U32(v) => match v {
498 Some(u) => (true, u as u64, 4),
499 None => (false, 0, 4),
500 },
501 Datamatch::U64(v) => match v {
502 Some(u) => (true, u, 8),
503 None => (false, 0, 8),
504 },
505 };
506 let mut flags = 0;
507 if deassign {
508 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
509 }
510 if do_datamatch {
511 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
512 }
513 if let IoEventAddress::Pio(_) = addr {
514 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
515 }
516 let ioeventfd = kvm_ioeventfd {
517 datamatch: datamatch_value,
518 len: datamatch_len,
519 addr: match addr {
520 IoEventAddress::Pio(p) => p,
521 IoEventAddress::Mmio(m) => m,
522 },
523 fd: evt.as_raw_descriptor(),
524 flags,
525 ..Default::default()
526 };
527 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD, &ioeventfd) };
531 if ret == 0 {
532 Ok(())
533 } else {
534 errno_result()
535 }
536 }
537
538 pub fn check_raw_capability(&self, capability: KvmCap) -> bool {
540 let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, capability as c_ulong) };
544 match capability {
545 #[cfg(target_arch = "x86_64")]
546 KvmCap::BusLockDetect => {
547 if ret > 0 {
548 ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT
549 } else {
550 false
551 }
552 }
553 _ => ret == 1,
554 }
555 }
556
557 #[allow(dead_code)]
559 unsafe fn enable_raw_capability(
566 &self,
567 capability: KvmCap,
568 flags: u32,
569 args: &[u64; 4],
570 ) -> Result<()> {
571 let kvm_cap = kvm_enable_cap {
572 cap: capability as u32,
573 args: *args,
574 flags,
575 ..Default::default()
576 };
577 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
581 if ret == 0 {
582 Ok(())
583 } else {
584 errno_result()
585 }
586 }
587
588 fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
589 match if self.guest_mem.use_punchhole_locked() {
590 self.guest_mem.punch_hole_range(guest_address, size)
591 } else {
592 self.guest_mem.remove_range(guest_address, size)
593 } {
594 Ok(_) => Ok(()),
595 Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
596 Err(_) => Err(Error::new(EIO)),
597 }
598 }
599
600 fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
601 Ok(())
603 }
604}
605
606impl Vm for KvmVm {
607 fn try_clone(&self) -> Result<Self> {
608 Ok(KvmVm {
609 kvm: self.kvm.try_clone()?,
610 vm: self.vm.try_clone()?,
611 guest_mem: self.guest_mem.clone(),
612 mem_regions: self.mem_regions.clone(),
613 mem_slot_gaps: self.mem_slot_gaps.clone(),
614 caps: self.caps.clone(),
615 force_disable_readonly_mem: self.force_disable_readonly_mem,
616 })
617 }
618
619 fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
620 self.vm.try_clone()
621 }
622
623 fn hypervisor_kind(&self) -> HypervisorKind {
624 HypervisorKind::Kvm
625 }
626
627 fn check_capability(&self, c: VmCap) -> bool {
628 if let Some(val) = self.check_capability_arch(c) {
629 return val;
630 }
631 match c {
632 #[cfg(target_arch = "aarch64")]
633 VmCap::ArmPmuV3 => self.check_raw_capability(KvmCap::ArmPmuV3),
634 VmCap::DirtyLog => true,
635 VmCap::PvClock => false,
636 VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm),
637 VmCap::EarlyInitCpuid => false,
638 #[cfg(target_arch = "x86_64")]
639 VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect),
640 VmCap::ReadOnlyMemoryRegion => {
641 !self.force_disable_readonly_mem && self.check_raw_capability(KvmCap::ReadonlyMem)
642 }
643 VmCap::MemNoncoherentDma => {
644 cfg!(feature = "noncoherent-dma")
645 && self.check_raw_capability(KvmCap::MemNoncoherentDma)
646 }
647 #[cfg(target_arch = "aarch64")]
648 VmCap::Sve => self.check_raw_capability(KvmCap::Sve),
649 }
650 }
651
652 fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> {
653 match c {
654 #[cfg(target_arch = "x86_64")]
655 VmCap::BusLockDetect => {
656 let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0];
657 Ok(
658 #[allow(clippy::undocumented_unsafe_blocks)]
660 unsafe {
661 self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(())
662 },
663 )
664 }
665 _ => Ok(false),
666 }
667 }
668
669 fn get_guest_phys_addr_bits(&self) -> u8 {
670 self.kvm.get_guest_phys_addr_bits()
671 }
672
673 fn get_memory(&self) -> &GuestMemory {
674 &self.guest_mem
675 }
676
677 fn add_memory_region(
678 &mut self,
679 guest_addr: GuestAddress,
680 mem: Box<dyn MappedRegion>,
681 read_only: bool,
682 log_dirty_pages: bool,
683 cache: MemCacheType,
684 ) -> Result<MemSlot> {
685 let pgsz = pagesize() as u64;
686 let size = (mem.size() as u64).next_multiple_of(pgsz);
690 let end_addr = guest_addr
691 .checked_add(size)
692 .ok_or_else(|| Error::new(EOVERFLOW))?;
693 if self.guest_mem.range_overlap(guest_addr, end_addr) {
694 return Err(Error::new(ENOSPC));
695 }
696 let mut regions = self.mem_regions.lock();
697 let mut gaps = self.mem_slot_gaps.lock();
698 let slot = match gaps.pop() {
699 Some(gap) => gap.0,
700 None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
701 };
702
703 let res = unsafe {
709 set_user_memory_region(
710 self,
711 slot,
712 read_only,
713 log_dirty_pages,
714 cache,
715 guest_addr.offset(),
716 size,
717 mem.as_ptr(),
718 )
719 };
720
721 if let Err(e) = res {
722 gaps.push(Reverse(slot));
723 return Err(e);
724 }
725 regions.insert(slot, mem);
726 Ok(slot)
727 }
728
729 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
730 let mut regions = self.mem_regions.lock();
731 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
732
733 mem.msync(offset, size).map_err(|err| match err {
734 MmapError::InvalidAddress => Error::new(EFAULT),
735 MmapError::NotPageAligned => Error::new(EINVAL),
736 MmapError::SystemCallFailed(e) => e,
737 _ => Error::new(EIO),
738 })
739 }
740
741 fn madvise_pageout_memory_region(
742 &mut self,
743 slot: MemSlot,
744 offset: usize,
745 size: usize,
746 ) -> Result<()> {
747 let mut regions = self.mem_regions.lock();
748 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
749
750 mem.madvise(offset, size, libc::MADV_PAGEOUT)
751 .map_err(|err| match err {
752 MmapError::InvalidAddress => Error::new(EFAULT),
753 MmapError::NotPageAligned => Error::new(EINVAL),
754 MmapError::SystemCallFailed(e) => e,
755 _ => Error::new(EIO),
756 })
757 }
758
759 fn madvise_remove_memory_region(
760 &mut self,
761 slot: MemSlot,
762 offset: usize,
763 size: usize,
764 ) -> Result<()> {
765 let mut regions = self.mem_regions.lock();
766 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
767
768 mem.madvise(offset, size, libc::MADV_REMOVE)
769 .map_err(|err| match err {
770 MmapError::InvalidAddress => Error::new(EFAULT),
771 MmapError::NotPageAligned => Error::new(EINVAL),
772 MmapError::SystemCallFailed(e) => e,
773 _ => Error::new(EIO),
774 })
775 }
776
777 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
778 let mut regions = self.mem_regions.lock();
779 if !regions.contains_key(&slot) {
780 return Err(Error::new(ENOENT));
781 }
782 unsafe {
785 set_user_memory_region(
786 self,
787 slot,
788 false,
789 false,
790 MemCacheType::CacheCoherent,
791 0,
792 0,
793 std::ptr::null_mut(),
794 )?;
795 }
796 self.mem_slot_gaps.lock().push(Reverse(slot));
797 Ok(regions.remove(&slot).unwrap())
799 }
800
801 fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> {
802 let mut device = if let Some(dev) = self.get_device_params_arch(kind) {
803 dev
804 } else {
805 match kind {
806 DeviceKind::Vfio => kvm_create_device {
807 type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
808 fd: 0,
809 flags: 0,
810 },
811
812 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
814 _ => return Err(Error::new(libc::ENXIO)),
815 }
816 };
817
818 let ret = unsafe { base::ioctl_with_mut_ref(self, KVM_CREATE_DEVICE, &mut device) };
822 if ret == 0 {
823 Ok(
824 unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) },
827 )
828 } else {
829 errno_result()
830 }
831 }
832
833 fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> {
834 let regions = self.mem_regions.lock();
835 let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?;
836 if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() {
838 return Err(Error::new(EINVAL));
839 }
840
841 let mut dirty_log_kvm = kvm_dirty_log {
842 slot,
843 ..Default::default()
844 };
845 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
846 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG, &dirty_log_kvm) };
850 if ret == 0 {
851 Ok(())
852 } else {
853 errno_result()
854 }
855 }
856
857 fn register_ioevent(
858 &mut self,
859 evt: &Event,
860 addr: IoEventAddress,
861 datamatch: Datamatch,
862 ) -> Result<()> {
863 self.ioeventfd(evt, addr, datamatch, false)
864 }
865
866 fn unregister_ioevent(
867 &mut self,
868 evt: &Event,
869 addr: IoEventAddress,
870 datamatch: Datamatch,
871 ) -> Result<()> {
872 self.ioeventfd(evt, addr, datamatch, true)
873 }
874
875 fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
876 Ok(())
878 }
879
880 fn get_pvclock(&self) -> Result<ClockState> {
881 self.get_pvclock_arch()
882 }
883
884 fn set_pvclock(&self, state: &ClockState) -> Result<()> {
885 self.set_pvclock_arch(state)
886 }
887
888 fn add_fd_mapping(
889 &mut self,
890 slot: u32,
891 offset: usize,
892 size: usize,
893 fd: &dyn AsRawDescriptor,
894 fd_offset: u64,
895 prot: Protection,
896 ) -> Result<()> {
897 let mut regions = self.mem_regions.lock();
898 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
899
900 match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
901 Ok(()) => Ok(()),
902 Err(MmapError::SystemCallFailed(e)) => Err(e),
903 Err(_) => Err(Error::new(EIO)),
904 }
905 }
906
907 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
908 let mut regions = self.mem_regions.lock();
909 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
910
911 match region.remove_mapping(offset, size) {
912 Ok(()) => Ok(()),
913 Err(MmapError::SystemCallFailed(e)) => Err(e),
914 Err(_) => Err(Error::new(EIO)),
915 }
916 }
917
918 fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
919 match event {
920 BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
921 BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
922 BalloonEvent::BalloonTargetReached(_) => Ok(()),
923 }
924 }
925}
926
927impl AsRawDescriptor for KvmVm {
928 fn as_raw_descriptor(&self) -> RawDescriptor {
929 self.vm.as_raw_descriptor()
930 }
931}
932
933struct KvmVcpuSignalHandle {
934 run_mmap: Arc<MemoryMapping>,
935}
936
937impl VcpuSignalHandleInner for KvmVcpuSignalHandle {
938 fn signal_immediate_exit(&self) {
939 unsafe {
942 let run = self.run_mmap.as_ptr() as *mut kvm_run;
943 (*run).immediate_exit = 1;
944 }
945 }
946}
947
948pub struct KvmVcpu {
950 kvm: Kvm,
951 vm: SafeDescriptor,
952 vcpu: File,
953 id: usize,
954 cap_kvmclock_ctrl: bool,
955 run_mmap: Arc<MemoryMapping>,
956}
957
958impl Vcpu for KvmVcpu {
959 fn try_clone(&self) -> Result<Self> {
960 let vm = self.vm.try_clone()?;
961 let vcpu = self.vcpu.try_clone()?;
962
963 Ok(KvmVcpu {
964 kvm: self.kvm.try_clone()?,
965 vm,
966 vcpu,
967 cap_kvmclock_ctrl: self.cap_kvmclock_ctrl,
968 id: self.id,
969 run_mmap: self.run_mmap.clone(),
970 })
971 }
972
973 fn as_vcpu(&self) -> &dyn Vcpu {
974 self
975 }
976
977 fn id(&self) -> usize {
978 self.id
979 }
980
981 #[allow(clippy::cast_ptr_alignment)]
982 fn set_immediate_exit(&self, exit: bool) {
983 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
988 run.immediate_exit = exit.into();
989 }
990
991 fn signal_handle(&self) -> VcpuSignalHandle {
992 VcpuSignalHandle {
993 inner: Box::new(KvmVcpuSignalHandle {
994 run_mmap: self.run_mmap.clone(),
995 }),
996 }
997 }
998
999 fn on_suspend(&self) -> Result<()> {
1000 if self.cap_kvmclock_ctrl {
1005 if unsafe { ioctl(self, KVM_KVMCLOCK_CTRL) } != 0 {
1008 if Error::last().errno() != libc::EINVAL {
1011 return errno_result();
1012 }
1013 }
1014 }
1015
1016 Ok(())
1017 }
1018
1019 unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> {
1020 let kvm_cap = kvm_enable_cap {
1021 cap,
1022 args: *args,
1023 ..Default::default()
1024 };
1025 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
1029 if ret == 0 {
1030 Ok(())
1031 } else {
1032 errno_result()
1033 }
1034 }
1035
1036 #[allow(clippy::cast_ptr_alignment)]
1037 fn run(&mut self) -> Result<VcpuExit> {
1040 let ret = unsafe { ioctl(self, KVM_RUN) };
1043 if ret != 0 {
1044 return errno_result();
1045 }
1046
1047 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1051
1052 if let Some(vcpu_exit) = self.handle_vm_exit_arch(run) {
1055 return Ok(vcpu_exit);
1056 }
1057
1058 match run.exit_reason {
1059 KVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
1060 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1061 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1062 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1063 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1064 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown(Ok(()))),
1065 KVM_EXIT_FAIL_ENTRY => {
1066 let hardware_entry_failure_reason = unsafe {
1070 run.__bindgen_anon_1
1071 .fail_entry
1072 .hardware_entry_failure_reason
1073 };
1074 Ok(VcpuExit::FailEntry {
1075 hardware_entry_failure_reason,
1076 })
1077 }
1078 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1079 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1080 KVM_EXIT_SYSTEM_EVENT => {
1081 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1085 let event_flags =
1086 unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1090 match event_type {
1091 KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1092 KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags),
1093 KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1094 _ => {
1095 error!(
1096 "Unknown KVM system event {} with flags {}",
1097 event_type, event_flags
1098 );
1099 Err(Error::new(EINVAL))
1100 }
1101 }
1102 }
1103 r => panic!("unknown kvm exit reason: {r}"),
1104 }
1105 }
1106
1107 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
1108 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1112 assert!(run.exit_reason == KVM_EXIT_MMIO);
1114 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1118 let address = mmio.phys_addr;
1119 let data = &mut mmio.data[..mmio.len as usize];
1120 if mmio.is_write != 0 {
1121 handle_fn(IoParams {
1122 address,
1123 operation: IoOperation::Write(data),
1124 })
1125 } else {
1126 handle_fn(IoParams {
1127 address,
1128 operation: IoOperation::Read(data),
1129 })
1130 }
1131 }
1132
1133 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
1134 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1138 assert!(run.exit_reason == KVM_EXIT_IO);
1140 let io = unsafe { run.__bindgen_anon_1.io };
1144 let address = u64::from(io.port);
1145 let size = usize::from(io.size);
1146 let count = io.count as usize;
1147 let data_len = count * size;
1148 let data_offset = io.data_offset as usize;
1149 assert!(data_offset + data_len <= self.run_mmap.size());
1150
1151 let buffer: &mut [u8] = unsafe {
1155 std::slice::from_raw_parts_mut(
1156 (run as *mut kvm_run as *mut u8).add(data_offset),
1157 data_len,
1158 )
1159 };
1160 let data_chunks = buffer.chunks_mut(size);
1161
1162 if io.direction == KVM_EXIT_IO_IN as u8 {
1163 for data in data_chunks {
1164 handle_fn(IoParams {
1165 address,
1166 operation: IoOperation::Read(data),
1167 });
1168 }
1169 } else {
1170 debug_assert_eq!(io.direction, KVM_EXIT_IO_OUT as u8);
1171 for data in data_chunks {
1172 handle_fn(IoParams {
1173 address,
1174 operation: IoOperation::Write(data),
1175 });
1176 }
1177 }
1178
1179 Ok(())
1180 }
1181}
1182
1183impl KvmVcpu {
1184 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1192 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1194 let ret = {
1195 unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE, &mut state) }
1200 };
1201 if ret < 0 {
1202 return errno_result();
1203 }
1204 Ok(state)
1205 }
1206
1207 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1215 let ret = {
1216 unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE, state) }
1219 };
1220 if ret < 0 {
1221 return errno_result();
1222 }
1223 Ok(())
1224 }
1225}
1226
1227impl AsRawDescriptor for KvmVcpu {
1228 fn as_raw_descriptor(&self) -> RawDescriptor {
1229 self.vcpu.as_raw_descriptor()
1230 }
1231}
1232
1233impl TryFrom<HypervisorCap> for KvmCap {
1234 type Error = Error;
1235
1236 fn try_from(cap: HypervisorCap) -> Result<KvmCap> {
1237 match cap {
1238 HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit),
1239 HypervisorCap::UserMemory => Ok(KvmCap::UserMemory),
1240 #[cfg(target_arch = "x86_64")]
1241 HypervisorCap::Xcrs => Ok(KvmCap::Xcrs),
1242 #[cfg(target_arch = "x86_64")]
1243 HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)),
1244 HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)),
1245 HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)),
1246 }
1247 }
1248}
1249
1250fn to_kvm_irq_routing_entry(item: &IrqRoute, cap_msi_devid: bool) -> kvm_irq_routing_entry {
1251 match &item.source {
1252 IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry {
1253 gsi: item.gsi,
1254 type_: KVM_IRQ_ROUTING_IRQCHIP,
1255 u: kvm_irq_routing_entry__bindgen_ty_1 {
1256 irqchip: kvm_irq_routing_irqchip {
1257 irqchip: chip_to_kvm_chip(*chip),
1258 pin: *pin,
1259 },
1260 },
1261 ..Default::default()
1262 },
1263 IrqSource::Msi {
1264 address,
1265 data,
1266 #[cfg(target_arch = "aarch64")]
1267 pci_address,
1268 } => {
1269 let devid = if cap_msi_devid {
1273 #[cfg(not(target_arch = "aarch64"))]
1274 panic!("unexpected KVM_CAP_MSI_DEVID");
1275 #[cfg(target_arch = "aarch64")]
1276 Some(pci_address.to_u32())
1277 } else {
1278 None
1279 };
1280 kvm_irq_routing_entry {
1281 gsi: item.gsi,
1282 type_: KVM_IRQ_ROUTING_MSI,
1283 flags: if devid.is_some() {
1284 KVM_MSI_VALID_DEVID
1285 } else {
1286 0
1287 },
1288 u: kvm_irq_routing_entry__bindgen_ty_1 {
1289 msi: kvm_irq_routing_msi {
1290 address_lo: *address as u32,
1291 address_hi: (*address >> 32) as u32,
1292 data: *data,
1293 __bindgen_anon_1: kvm_irq_routing_msi__bindgen_ty_1 {
1294 devid: devid.unwrap_or_default(),
1295 },
1296 },
1297 },
1298 ..Default::default()
1299 }
1300 }
1301 }
1302}
1303
1304impl From<&kvm_mp_state> for MPState {
1305 fn from(item: &kvm_mp_state) -> Self {
1306 match item.mp_state {
1307 KVM_MP_STATE_RUNNABLE => MPState::Runnable,
1308 KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized,
1309 KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived,
1310 KVM_MP_STATE_HALTED => MPState::Halted,
1311 KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived,
1312 KVM_MP_STATE_STOPPED => MPState::Stopped,
1313 state => {
1314 error!(
1315 "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE",
1316 state
1317 );
1318 MPState::Runnable
1319 }
1320 }
1321 }
1322}
1323
1324impl From<&MPState> for kvm_mp_state {
1325 fn from(item: &MPState) -> Self {
1326 kvm_mp_state {
1327 mp_state: match item {
1328 MPState::Runnable => KVM_MP_STATE_RUNNABLE,
1329 MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED,
1330 MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED,
1331 MPState::Halted => KVM_MP_STATE_HALTED,
1332 MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED,
1333 MPState::Stopped => KVM_MP_STATE_STOPPED,
1334 },
1335 }
1336 }
1337}