1#[cfg(target_arch = "aarch64")]
6mod aarch64;
7#[cfg(target_arch = "riscv64")]
8mod riscv64;
9#[cfg(target_arch = "x86_64")]
10mod x86_64;
11
12mod cap;
13
14use std::cmp::Reverse;
15use std::collections::BTreeMap;
16use std::collections::BinaryHeap;
17use std::convert::TryFrom;
18use std::ffi::CString;
19use std::fs::File;
20use std::os::raw::c_ulong;
21use std::os::raw::c_void;
22use std::os::unix::prelude::OsStrExt;
23use std::path::Path;
24use std::sync::Arc;
25use std::sync::OnceLock;
26
27#[cfg(target_arch = "aarch64")]
28pub use aarch64::*;
29use base::errno_result;
30use base::error;
31use base::ioctl;
32use base::ioctl_with_mut_ref;
33use base::ioctl_with_ref;
34use base::ioctl_with_val;
35use base::pagesize;
36use base::AsRawDescriptor;
37use base::Error;
38use base::Event;
39use base::FromRawDescriptor;
40use base::MappedRegion;
41use base::MemoryMapping;
42use base::MemoryMappingBuilder;
43use base::MmapError;
44use base::Protection;
45use base::RawDescriptor;
46use base::Result;
47use base::SafeDescriptor;
48pub use cap::KvmCap;
49use cfg_if::cfg_if;
50use data_model::vec_with_array_field;
51use kvm_sys::*;
52use libc::open64;
53use libc::EFAULT;
54use libc::EINVAL;
55use libc::EIO;
56use libc::ENOENT;
57use libc::ENOSPC;
58use libc::ENOSYS;
59#[cfg(not(target_arch = "aarch64"))]
60use libc::ENOTSUP;
61use libc::EOVERFLOW;
62use libc::O_CLOEXEC;
63use libc::O_RDWR;
64#[cfg(target_arch = "riscv64")]
65use riscv64::*;
66use sync::Mutex;
67use vm_memory::GuestAddress;
68use vm_memory::GuestMemory;
69#[cfg(target_arch = "x86_64")]
70pub use x86_64::*;
71
72use crate::BalloonEvent;
73use crate::ClockState;
74use crate::Config;
75use crate::Datamatch;
76use crate::DeviceKind;
77use crate::HypercallAbi;
78use crate::Hypervisor;
79use crate::HypervisorCap;
80use crate::HypervisorKind;
81use crate::IoEventAddress;
82use crate::IoOperation;
83use crate::IoParams;
84use crate::IrqRoute;
85use crate::IrqSource;
86use crate::MPState;
87use crate::MemCacheType;
88use crate::MemSlot;
89use crate::Vcpu;
90use crate::VcpuExit;
91use crate::VcpuSignalHandle;
92use crate::VcpuSignalHandleInner;
93use crate::Vm;
94use crate::VmCap;
95
96unsafe fn set_user_memory_region(
102 kvm: &KvmVm,
103 slot: MemSlot,
104 read_only: bool,
105 log_dirty_pages: bool,
106 cache: MemCacheType,
107 guest_addr: u64,
108 memory_size: u64,
109 userspace_addr: *mut u8,
110) -> Result<()> {
111 let mut use_2_variant = false;
112 let mut flags = 0;
113 if read_only {
114 flags |= KVM_MEM_READONLY;
115 }
116 if log_dirty_pages {
117 flags |= KVM_MEM_LOG_DIRTY_PAGES;
118 }
119 if kvm.caps.user_noncoherent_dma && cache == MemCacheType::CacheNonCoherent {
120 flags |= KVM_MEM_NON_COHERENT_DMA;
121 use_2_variant = kvm.caps.user_memory_region2;
122 }
123
124 let untagged_userspace_addr = untagged_addr(userspace_addr as usize);
125 let ret = if use_2_variant {
126 let region2 = kvm_userspace_memory_region2 {
127 slot,
128 flags,
129 guest_phys_addr: guest_addr,
130 memory_size,
131 userspace_addr: untagged_userspace_addr as u64,
132 guest_memfd_offset: 0,
133 guest_memfd: 0,
134 ..Default::default()
135 };
136 ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION2, ®ion2)
137 } else {
138 let region = kvm_userspace_memory_region {
139 slot,
140 flags,
141 guest_phys_addr: guest_addr,
142 memory_size,
143 userspace_addr: (untagged_userspace_addr as u64),
144 };
145 ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION, ®ion)
146 };
147
148 if ret == 0 {
149 Ok(())
150 } else {
151 errno_result()
152 }
153}
154
155#[inline]
159fn untagged_addr(addr: usize) -> usize {
160 let tag_bits_mask: u64 = if cfg!(target_arch = "aarch64") {
161 0xFF00000000000000
162 } else {
163 0
164 };
165 addr & !tag_bits_mask as usize
166}
167
168pub fn dirty_log_bitmap_size(size: usize) -> usize {
175 let page_size = pagesize();
176 size.div_ceil(page_size).div_ceil(8)
177}
178
179pub struct Kvm {
180 kvm: SafeDescriptor,
181 vcpu_mmap_size: usize,
182}
183
184impl Kvm {
185 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
186 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
187 let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
190 if ret < 0 {
191 return errno_result();
192 }
193 let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
196
197 let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION) };
200 if version < 0 {
201 return errno_result();
202 }
203
204 if version as u32 != KVM_API_VERSION {
207 error!(
208 "KVM_GET_API_VERSION: expected {}, got {}",
209 KVM_API_VERSION, version,
210 );
211 return Err(Error::new(ENOSYS));
212 }
213
214 let res = unsafe { ioctl(&kvm, KVM_GET_VCPU_MMAP_SIZE) };
217 if res <= 0 {
218 return errno_result();
219 }
220 let vcpu_mmap_size = res as usize;
221
222 Ok(Kvm {
223 kvm,
224 vcpu_mmap_size,
225 })
226 }
227
228 pub fn new() -> Result<Kvm> {
230 Kvm::new_with_path(Path::new("/dev/kvm"))
231 }
232}
233
234impl AsRawDescriptor for Kvm {
235 fn as_raw_descriptor(&self) -> RawDescriptor {
236 self.kvm.as_raw_descriptor()
237 }
238}
239
240impl Hypervisor for Kvm {
241 fn try_clone(&self) -> Result<Self> {
242 Ok(Kvm {
243 kvm: self.kvm.try_clone()?,
244 vcpu_mmap_size: self.vcpu_mmap_size,
245 })
246 }
247
248 fn check_capability(&self, cap: HypervisorCap) -> bool {
249 if let Ok(kvm_cap) = KvmCap::try_from(cap) {
250 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, kvm_cap as c_ulong) == 1 }
254 } else {
255 false
257 }
258 }
259}
260
261#[derive(Clone, Default)]
263struct KvmVmCaps {
264 kvmclock_ctrl: bool,
265 user_noncoherent_dma: bool,
266 user_memory_region2: bool,
267 msi_devid: Arc<OnceLock<bool>>,
270}
271
272pub struct KvmVm {
274 kvm: Kvm,
275 vm: SafeDescriptor,
276 guest_mem: GuestMemory,
277 mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
278 mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
280 caps: KvmVmCaps,
281 force_disable_readonly_mem: bool,
282}
283
284impl KvmVm {
285 pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> {
287 let ret = unsafe {
291 ioctl_with_val(
292 kvm,
293 KVM_CREATE_VM,
294 kvm.get_vm_type(cfg.protection_type)? as c_ulong,
295 )
296 };
297 if ret < 0 {
298 return errno_result();
299 }
300 let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
303 let mut vm = KvmVm {
304 kvm: kvm.try_clone()?,
305 vm: vm_descriptor,
306 guest_mem,
307 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
308 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
309 caps: Default::default(),
310 force_disable_readonly_mem: cfg.force_disable_readonly_mem,
311 };
312 vm.caps.kvmclock_ctrl = vm.check_raw_capability(KvmCap::KvmclockCtrl);
313 vm.caps.user_noncoherent_dma = vm.check_raw_capability(KvmCap::MemNoncoherentDma);
314 vm.caps.user_memory_region2 = vm.check_raw_capability(KvmCap::UserMemory2);
315
316 vm.init_arch(&cfg)?;
317
318 for region in vm.guest_mem.regions() {
319 unsafe {
322 set_user_memory_region(
323 &vm,
324 region.index as MemSlot,
325 false,
326 false,
327 MemCacheType::CacheCoherent,
328 region.guest_addr.offset(),
329 region.size as u64,
330 region.host_addr as *mut u8,
331 )
332 }?;
333 }
334
335 Ok(vm)
336 }
337
338 pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> {
339 let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU, c_ulong::try_from(id).unwrap()) };
342 if fd < 0 {
343 return errno_result();
344 }
345
346 let vcpu = unsafe { File::from_raw_descriptor(fd) };
350
351 let run_mmap = MemoryMappingBuilder::new(self.kvm.vcpu_mmap_size)
356 .from_file(&vcpu)
357 .build()
358 .map_err(|_| Error::new(ENOSPC))?;
359
360 Ok(KvmVcpu {
361 kvm: self.kvm.try_clone()?,
362 vm: self.vm.try_clone()?,
363 vcpu,
364 id,
365 cap_kvmclock_ctrl: self.caps.kvmclock_ctrl,
366 run_mmap: Arc::new(run_mmap),
367 })
368 }
369
370 pub fn create_irq_chip(&self) -> Result<()> {
374 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP) };
377 if ret == 0 {
378 Ok(())
379 } else {
380 errno_result()
381 }
382 }
383
384 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
386 let mut irq_level = kvm_irq_level::default();
387 irq_level.__bindgen_anon_1.irq = irq;
388 irq_level.level = active.into();
389
390 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE, &irq_level) };
394 if ret == 0 {
395 Ok(())
396 } else {
397 errno_result()
398 }
399 }
400
401 pub fn register_irqfd(
404 &self,
405 gsi: u32,
406 evt: &Event,
407 resample_evt: Option<&Event>,
408 ) -> Result<()> {
409 let mut irqfd = kvm_irqfd {
410 fd: evt.as_raw_descriptor() as u32,
411 gsi,
412 ..Default::default()
413 };
414
415 if let Some(r_evt) = resample_evt {
416 irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE;
417 irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
418 }
419
420 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
424 if ret == 0 {
425 Ok(())
426 } else {
427 errno_result()
428 }
429 }
430
431 pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
437 let irqfd = kvm_irqfd {
438 fd: evt.as_raw_descriptor() as u32,
439 gsi,
440 flags: KVM_IRQFD_FLAG_DEASSIGN,
441 ..Default::default()
442 };
443 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
447 if ret == 0 {
448 Ok(())
449 } else {
450 errno_result()
451 }
452 }
453
454 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
457 let mut irq_routing =
458 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
459 irq_routing[0].nr = routes.len() as u32;
460
461 let cap_msi_devid = *self
462 .caps
463 .msi_devid
464 .get_or_init(|| self.check_raw_capability(KvmCap::MsiDevid));
465
466 let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
470 for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
471 *irq_route = to_kvm_irq_routing_entry(route, cap_msi_devid);
472 }
473
474 #[allow(clippy::undocumented_unsafe_blocks)]
476 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING, &irq_routing[0]) };
477 if ret == 0 {
478 Ok(())
479 } else {
480 errno_result()
481 }
482 }
483
484 fn ioeventfd(
485 &self,
486 evt: &Event,
487 addr: IoEventAddress,
488 datamatch: Datamatch,
489 deassign: bool,
490 ) -> Result<()> {
491 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
492 Datamatch::AnyLength => (false, 0, 0),
493 Datamatch::U8(v) => match v {
494 Some(u) => (true, u as u64, 1),
495 None => (false, 0, 1),
496 },
497 Datamatch::U16(v) => match v {
498 Some(u) => (true, u as u64, 2),
499 None => (false, 0, 2),
500 },
501 Datamatch::U32(v) => match v {
502 Some(u) => (true, u as u64, 4),
503 None => (false, 0, 4),
504 },
505 Datamatch::U64(v) => match v {
506 Some(u) => (true, u, 8),
507 None => (false, 0, 8),
508 },
509 };
510 let mut flags = 0;
511 if deassign {
512 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
513 }
514 if do_datamatch {
515 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
516 }
517 if let IoEventAddress::Pio(_) = addr {
518 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
519 }
520 let ioeventfd = kvm_ioeventfd {
521 datamatch: datamatch_value,
522 len: datamatch_len,
523 addr: match addr {
524 IoEventAddress::Pio(p) => p,
525 IoEventAddress::Mmio(m) => m,
526 },
527 fd: evt.as_raw_descriptor(),
528 flags,
529 ..Default::default()
530 };
531 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD, &ioeventfd) };
535 if ret == 0 {
536 Ok(())
537 } else {
538 errno_result()
539 }
540 }
541
542 pub fn check_raw_capability(&self, capability: KvmCap) -> bool {
544 let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, capability as c_ulong) };
548 match capability {
549 #[cfg(target_arch = "x86_64")]
550 KvmCap::BusLockDetect => {
551 if ret > 0 {
552 ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT
553 } else {
554 false
555 }
556 }
557 _ => ret == 1,
558 }
559 }
560
561 #[allow(dead_code)]
563 unsafe fn enable_raw_capability(
570 &self,
571 capability: KvmCap,
572 flags: u32,
573 args: &[u64; 4],
574 ) -> Result<()> {
575 let kvm_cap = kvm_enable_cap {
576 cap: capability as u32,
577 args: *args,
578 flags,
579 ..Default::default()
580 };
581 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
585 if ret == 0 {
586 Ok(())
587 } else {
588 errno_result()
589 }
590 }
591
592 fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
593 match if self.guest_mem.use_punchhole_locked() {
594 self.guest_mem.punch_hole_range(guest_address, size)
595 } else {
596 self.guest_mem.remove_range(guest_address, size)
597 } {
598 Ok(_) => Ok(()),
599 Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
600 Err(_) => Err(Error::new(EIO)),
601 }
602 }
603
604 fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
605 Ok(())
607 }
608}
609
610impl Vm for KvmVm {
611 fn try_clone(&self) -> Result<Self> {
612 Ok(KvmVm {
613 kvm: self.kvm.try_clone()?,
614 vm: self.vm.try_clone()?,
615 guest_mem: self.guest_mem.clone(),
616 mem_regions: self.mem_regions.clone(),
617 mem_slot_gaps: self.mem_slot_gaps.clone(),
618 caps: self.caps.clone(),
619 force_disable_readonly_mem: self.force_disable_readonly_mem,
620 })
621 }
622
623 fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
624 self.vm.try_clone()
625 }
626
627 fn hypervisor_kind(&self) -> HypervisorKind {
628 HypervisorKind::Kvm
629 }
630
631 fn check_capability(&self, c: VmCap) -> bool {
632 if let Some(val) = self.check_capability_arch(c) {
633 return val;
634 }
635 match c {
636 #[cfg(target_arch = "aarch64")]
637 VmCap::ArmPmuV3 => self.check_raw_capability(KvmCap::ArmPmuV3),
638 VmCap::DirtyLog => true,
639 VmCap::PvClock => false,
640 VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm),
641 VmCap::EarlyInitCpuid => false,
642 #[cfg(target_arch = "x86_64")]
643 VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect),
644 VmCap::ReadOnlyMemoryRegion => {
645 !self.force_disable_readonly_mem && self.check_raw_capability(KvmCap::ReadonlyMem)
646 }
647 VmCap::MemNoncoherentDma => {
648 cfg!(feature = "noncoherent-dma")
649 && self.check_raw_capability(KvmCap::MemNoncoherentDma)
650 }
651 #[cfg(target_arch = "aarch64")]
652 VmCap::Sve => self.check_raw_capability(KvmCap::Sve),
653 }
654 }
655
656 fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> {
657 match c {
658 #[cfg(target_arch = "x86_64")]
659 VmCap::BusLockDetect => {
660 let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0];
661 Ok(
662 #[allow(clippy::undocumented_unsafe_blocks)]
664 unsafe {
665 self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(())
666 },
667 )
668 }
669 _ => Ok(false),
670 }
671 }
672
673 fn get_guest_phys_addr_bits(&self) -> u8 {
674 self.kvm.get_guest_phys_addr_bits()
675 }
676
677 fn get_memory(&self) -> &GuestMemory {
678 &self.guest_mem
679 }
680
681 fn add_memory_region(
682 &mut self,
683 guest_addr: GuestAddress,
684 mem: Box<dyn MappedRegion>,
685 read_only: bool,
686 log_dirty_pages: bool,
687 cache: MemCacheType,
688 ) -> Result<MemSlot> {
689 let pgsz = pagesize() as u64;
690 let size = (mem.size() as u64).next_multiple_of(pgsz);
694 let end_addr = guest_addr
695 .checked_add(size)
696 .ok_or_else(|| Error::new(EOVERFLOW))?;
697 if self.guest_mem.range_overlap(guest_addr, end_addr) {
698 return Err(Error::new(ENOSPC));
699 }
700 let mut regions = self.mem_regions.lock();
701 let mut gaps = self.mem_slot_gaps.lock();
702 let slot = match gaps.pop() {
703 Some(gap) => gap.0,
704 None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
705 };
706
707 let res = unsafe {
713 set_user_memory_region(
714 self,
715 slot,
716 read_only,
717 log_dirty_pages,
718 cache,
719 guest_addr.offset(),
720 size,
721 mem.as_ptr(),
722 )
723 };
724
725 if let Err(e) = res {
726 gaps.push(Reverse(slot));
727 return Err(e);
728 }
729 regions.insert(slot, mem);
730 Ok(slot)
731 }
732
733 fn enable_hypercalls(&mut self, nr: u64, count: usize) -> Result<()> {
734 cfg_if! {
735 if #[cfg(target_arch = "aarch64")] {
736 let base = u32::try_from(nr).unwrap();
737 let nr_functions = u32::try_from(count).unwrap();
738 self.enable_smccc_forwarding(base, nr_functions)
739 } else {
740 let _ = nr;
741 let _ = count;
742 Err(Error::new(ENOTSUP))
743 }
744 }
745 }
746
747 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
748 let mut regions = self.mem_regions.lock();
749 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
750
751 mem.msync(offset, size).map_err(|err| match err {
752 MmapError::InvalidAddress => Error::new(EFAULT),
753 MmapError::NotPageAligned => Error::new(EINVAL),
754 MmapError::SystemCallFailed(e) => e,
755 _ => Error::new(EIO),
756 })
757 }
758
759 fn madvise_pageout_memory_region(
760 &mut self,
761 slot: MemSlot,
762 offset: usize,
763 size: usize,
764 ) -> Result<()> {
765 let mut regions = self.mem_regions.lock();
766 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
767
768 mem.madvise(offset, size, libc::MADV_PAGEOUT)
769 .map_err(|err| match err {
770 MmapError::InvalidAddress => Error::new(EFAULT),
771 MmapError::NotPageAligned => Error::new(EINVAL),
772 MmapError::SystemCallFailed(e) => e,
773 _ => Error::new(EIO),
774 })
775 }
776
777 fn madvise_remove_memory_region(
778 &mut self,
779 slot: MemSlot,
780 offset: usize,
781 size: usize,
782 ) -> Result<()> {
783 let mut regions = self.mem_regions.lock();
784 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
785
786 mem.madvise(offset, size, libc::MADV_REMOVE)
787 .map_err(|err| match err {
788 MmapError::InvalidAddress => Error::new(EFAULT),
789 MmapError::NotPageAligned => Error::new(EINVAL),
790 MmapError::SystemCallFailed(e) => e,
791 _ => Error::new(EIO),
792 })
793 }
794
795 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
796 let mut regions = self.mem_regions.lock();
797 if !regions.contains_key(&slot) {
798 return Err(Error::new(ENOENT));
799 }
800 unsafe {
803 set_user_memory_region(
804 self,
805 slot,
806 false,
807 false,
808 MemCacheType::CacheCoherent,
809 0,
810 0,
811 std::ptr::null_mut(),
812 )?;
813 }
814 self.mem_slot_gaps.lock().push(Reverse(slot));
815 Ok(regions.remove(&slot).unwrap())
817 }
818
819 fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> {
820 let mut device = if let Some(dev) = self.get_device_params_arch(kind) {
821 dev
822 } else {
823 match kind {
824 DeviceKind::Vfio => kvm_create_device {
825 type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
826 fd: 0,
827 flags: 0,
828 },
829
830 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
832 _ => return Err(Error::new(libc::ENXIO)),
833 }
834 };
835
836 let ret = unsafe { base::ioctl_with_mut_ref(self, KVM_CREATE_DEVICE, &mut device) };
840 if ret == 0 {
841 Ok(
842 unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) },
845 )
846 } else {
847 errno_result()
848 }
849 }
850
851 fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> {
852 let regions = self.mem_regions.lock();
853 let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?;
854 if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() {
856 return Err(Error::new(EINVAL));
857 }
858
859 let mut dirty_log_kvm = kvm_dirty_log {
860 slot,
861 ..Default::default()
862 };
863 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
864 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG, &dirty_log_kvm) };
868 if ret == 0 {
869 Ok(())
870 } else {
871 errno_result()
872 }
873 }
874
875 fn register_ioevent(
876 &mut self,
877 evt: &Event,
878 addr: IoEventAddress,
879 datamatch: Datamatch,
880 ) -> Result<()> {
881 self.ioeventfd(evt, addr, datamatch, false)
882 }
883
884 fn unregister_ioevent(
885 &mut self,
886 evt: &Event,
887 addr: IoEventAddress,
888 datamatch: Datamatch,
889 ) -> Result<()> {
890 self.ioeventfd(evt, addr, datamatch, true)
891 }
892
893 fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
894 Ok(())
896 }
897
898 fn get_pvclock(&self) -> Result<ClockState> {
899 self.get_pvclock_arch()
900 }
901
902 fn set_pvclock(&self, state: &ClockState) -> Result<()> {
903 self.set_pvclock_arch(state)
904 }
905
906 fn add_fd_mapping(
907 &mut self,
908 slot: u32,
909 offset: usize,
910 size: usize,
911 fd: &dyn AsRawDescriptor,
912 fd_offset: u64,
913 prot: Protection,
914 ) -> Result<()> {
915 let mut regions = self.mem_regions.lock();
916 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
917
918 match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
919 Ok(()) => Ok(()),
920 Err(MmapError::SystemCallFailed(e)) => Err(e),
921 Err(_) => Err(Error::new(EIO)),
922 }
923 }
924
925 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
926 let mut regions = self.mem_regions.lock();
927 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
928
929 match region.remove_mapping(offset, size) {
930 Ok(()) => Ok(()),
931 Err(MmapError::SystemCallFailed(e)) => Err(e),
932 Err(_) => Err(Error::new(EIO)),
933 }
934 }
935
936 fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
937 match event {
938 BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
939 BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
940 BalloonEvent::BalloonTargetReached(_) => Ok(()),
941 }
942 }
943}
944
945impl AsRawDescriptor for KvmVm {
946 fn as_raw_descriptor(&self) -> RawDescriptor {
947 self.vm.as_raw_descriptor()
948 }
949}
950
951struct KvmVcpuSignalHandle {
952 run_mmap: Arc<MemoryMapping>,
953}
954
955impl VcpuSignalHandleInner for KvmVcpuSignalHandle {
956 fn signal_immediate_exit(&self) {
957 unsafe {
960 let run = self.run_mmap.as_ptr() as *mut kvm_run;
961 (*run).immediate_exit = 1;
962 }
963 }
964}
965
966pub struct KvmVcpu {
968 kvm: Kvm,
969 vm: SafeDescriptor,
970 vcpu: File,
971 id: usize,
972 cap_kvmclock_ctrl: bool,
973 run_mmap: Arc<MemoryMapping>,
974}
975
976impl Vcpu for KvmVcpu {
977 fn try_clone(&self) -> Result<Self> {
978 let vm = self.vm.try_clone()?;
979 let vcpu = self.vcpu.try_clone()?;
980
981 Ok(KvmVcpu {
982 kvm: self.kvm.try_clone()?,
983 vm,
984 vcpu,
985 cap_kvmclock_ctrl: self.cap_kvmclock_ctrl,
986 id: self.id,
987 run_mmap: self.run_mmap.clone(),
988 })
989 }
990
991 fn as_vcpu(&self) -> &dyn Vcpu {
992 self
993 }
994
995 fn id(&self) -> usize {
996 self.id
997 }
998
999 #[allow(clippy::cast_ptr_alignment)]
1000 fn set_immediate_exit(&self, exit: bool) {
1001 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1006 run.immediate_exit = exit.into();
1007 }
1008
1009 fn signal_handle(&self) -> VcpuSignalHandle {
1010 VcpuSignalHandle {
1011 inner: Box::new(KvmVcpuSignalHandle {
1012 run_mmap: self.run_mmap.clone(),
1013 }),
1014 }
1015 }
1016
1017 fn on_suspend(&self) -> Result<()> {
1018 if self.cap_kvmclock_ctrl {
1023 if unsafe { ioctl(self, KVM_KVMCLOCK_CTRL) } != 0 {
1026 if Error::last().errno() != libc::EINVAL {
1029 return errno_result();
1030 }
1031 }
1032 }
1033
1034 Ok(())
1035 }
1036
1037 unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> {
1038 let kvm_cap = kvm_enable_cap {
1039 cap,
1040 args: *args,
1041 ..Default::default()
1042 };
1043 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
1047 if ret == 0 {
1048 Ok(())
1049 } else {
1050 errno_result()
1051 }
1052 }
1053
1054 #[allow(clippy::cast_ptr_alignment)]
1055 fn run(&mut self) -> Result<VcpuExit> {
1058 let ret = unsafe { ioctl(self, KVM_RUN) };
1061 if ret != 0 {
1062 return errno_result();
1063 }
1064
1065 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1069
1070 if let Some(vcpu_exit) = self.handle_vm_exit_arch(run) {
1073 return Ok(vcpu_exit);
1074 }
1075
1076 match run.exit_reason {
1077 KVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
1078 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1079 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1080 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1081 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1082 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown(Ok(()))),
1083 KVM_EXIT_FAIL_ENTRY => {
1084 let hardware_entry_failure_reason = unsafe {
1088 run.__bindgen_anon_1
1089 .fail_entry
1090 .hardware_entry_failure_reason
1091 };
1092 Ok(VcpuExit::FailEntry {
1093 hardware_entry_failure_reason,
1094 })
1095 }
1096 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1097 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1098 KVM_EXIT_SYSTEM_EVENT => {
1099 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1103 let event_flags =
1104 unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1108 match event_type {
1109 KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1110 KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags),
1111 KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1112 _ => {
1113 error!(
1114 "Unknown KVM system event {} with flags {}",
1115 event_type, event_flags
1116 );
1117 Err(Error::new(EINVAL))
1118 }
1119 }
1120 }
1121 r => panic!("unknown kvm exit reason: {r}"),
1122 }
1123 }
1124
1125 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
1126 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1130 assert!(run.exit_reason == KVM_EXIT_MMIO);
1132 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1136 let address = mmio.phys_addr;
1137 let data = &mut mmio.data[..mmio.len as usize];
1138 if mmio.is_write != 0 {
1139 handle_fn(IoParams {
1140 address,
1141 operation: IoOperation::Write(data),
1142 })
1143 } else {
1144 handle_fn(IoParams {
1145 address,
1146 operation: IoOperation::Read(data),
1147 })
1148 }
1149 }
1150
1151 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
1152 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1156 assert!(run.exit_reason == KVM_EXIT_IO);
1158 let io = unsafe { run.__bindgen_anon_1.io };
1162 let address = u64::from(io.port);
1163 let size = usize::from(io.size);
1164 let count = io.count as usize;
1165 let data_len = count * size;
1166 let data_offset = io.data_offset as usize;
1167 assert!(data_offset + data_len <= self.run_mmap.size());
1168
1169 let buffer: &mut [u8] = unsafe {
1173 std::slice::from_raw_parts_mut(
1174 (run as *mut kvm_run as *mut u8).add(data_offset),
1175 data_len,
1176 )
1177 };
1178 let data_chunks = buffer.chunks_mut(size);
1179
1180 if io.direction == KVM_EXIT_IO_IN as u8 {
1181 for data in data_chunks {
1182 handle_fn(IoParams {
1183 address,
1184 operation: IoOperation::Read(data),
1185 });
1186 }
1187 } else {
1188 debug_assert_eq!(io.direction, KVM_EXIT_IO_OUT as u8);
1189 for data in data_chunks {
1190 handle_fn(IoParams {
1191 address,
1192 operation: IoOperation::Write(data),
1193 });
1194 }
1195 }
1196
1197 Ok(())
1198 }
1199
1200 fn handle_hypercall(
1201 &self,
1202 handle_fn: &mut dyn FnMut(&mut HypercallAbi) -> anyhow::Result<()>,
1203 ) -> anyhow::Result<()> {
1204 cfg_if! {
1205 if #[cfg(target_arch = "aarch64")] {
1206 self.handle_smccc_call(handle_fn)
1208 } else {
1209 let _ = handle_fn;
1210 unimplemented!("KvmVcpu::handle_hypercall() not supported");
1211 }
1212 }
1213 }
1214}
1215
1216impl KvmVcpu {
1217 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1225 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1227 let ret = {
1228 unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE, &mut state) }
1233 };
1234 if ret < 0 {
1235 return errno_result();
1236 }
1237 Ok(state)
1238 }
1239
1240 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1248 let ret = {
1249 unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE, state) }
1252 };
1253 if ret < 0 {
1254 return errno_result();
1255 }
1256 Ok(())
1257 }
1258}
1259
1260impl AsRawDescriptor for KvmVcpu {
1261 fn as_raw_descriptor(&self) -> RawDescriptor {
1262 self.vcpu.as_raw_descriptor()
1263 }
1264}
1265
1266impl TryFrom<HypervisorCap> for KvmCap {
1267 type Error = Error;
1268
1269 fn try_from(cap: HypervisorCap) -> Result<KvmCap> {
1270 match cap {
1271 HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit),
1272 HypervisorCap::UserMemory => Ok(KvmCap::UserMemory),
1273 #[cfg(target_arch = "x86_64")]
1274 HypervisorCap::Xcrs => Ok(KvmCap::Xcrs),
1275 #[cfg(target_arch = "x86_64")]
1276 HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)),
1277 HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)),
1278 HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)),
1279 }
1280 }
1281}
1282
1283fn to_kvm_irq_routing_entry(item: &IrqRoute, cap_msi_devid: bool) -> kvm_irq_routing_entry {
1284 match &item.source {
1285 IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry {
1286 gsi: item.gsi,
1287 type_: KVM_IRQ_ROUTING_IRQCHIP,
1288 u: kvm_irq_routing_entry__bindgen_ty_1 {
1289 irqchip: kvm_irq_routing_irqchip {
1290 irqchip: chip_to_kvm_chip(*chip),
1291 pin: *pin,
1292 },
1293 },
1294 ..Default::default()
1295 },
1296 IrqSource::Msi {
1297 address,
1298 data,
1299 #[cfg(target_arch = "aarch64")]
1300 pci_address,
1301 } => {
1302 let devid = if cap_msi_devid {
1306 #[cfg(not(target_arch = "aarch64"))]
1307 panic!("unexpected KVM_CAP_MSI_DEVID");
1308 #[cfg(target_arch = "aarch64")]
1309 Some(pci_address.to_u32())
1310 } else {
1311 None
1312 };
1313 kvm_irq_routing_entry {
1314 gsi: item.gsi,
1315 type_: KVM_IRQ_ROUTING_MSI,
1316 flags: if devid.is_some() {
1317 KVM_MSI_VALID_DEVID
1318 } else {
1319 0
1320 },
1321 u: kvm_irq_routing_entry__bindgen_ty_1 {
1322 msi: kvm_irq_routing_msi {
1323 address_lo: *address as u32,
1324 address_hi: (*address >> 32) as u32,
1325 data: *data,
1326 __bindgen_anon_1: kvm_irq_routing_msi__bindgen_ty_1 {
1327 devid: devid.unwrap_or_default(),
1328 },
1329 },
1330 },
1331 ..Default::default()
1332 }
1333 }
1334 }
1335}
1336
1337impl From<&kvm_mp_state> for MPState {
1338 fn from(item: &kvm_mp_state) -> Self {
1339 match item.mp_state {
1340 KVM_MP_STATE_RUNNABLE => MPState::Runnable,
1341 KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized,
1342 KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived,
1343 KVM_MP_STATE_HALTED => MPState::Halted,
1344 KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived,
1345 KVM_MP_STATE_STOPPED => MPState::Stopped,
1346 state => {
1347 error!(
1348 "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE",
1349 state
1350 );
1351 MPState::Runnable
1352 }
1353 }
1354 }
1355}
1356
1357impl From<&MPState> for kvm_mp_state {
1358 fn from(item: &MPState) -> Self {
1359 kvm_mp_state {
1360 mp_state: match item {
1361 MPState::Runnable => KVM_MP_STATE_RUNNABLE,
1362 MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED,
1363 MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED,
1364 MPState::Halted => KVM_MP_STATE_HALTED,
1365 MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED,
1366 MPState::Stopped => KVM_MP_STATE_STOPPED,
1367 },
1368 }
1369 }
1370}