1#[cfg(target_arch = "aarch64")]
6mod aarch64;
7#[cfg(target_arch = "riscv64")]
8mod riscv64;
9#[cfg(target_arch = "x86_64")]
10mod x86_64;
11
12mod cap;
13
14use std::cmp::Reverse;
15use std::collections::BTreeMap;
16use std::collections::BinaryHeap;
17use std::convert::TryFrom;
18use std::ffi::CString;
19use std::fs::File;
20use std::os::raw::c_ulong;
21use std::os::raw::c_void;
22use std::os::unix::prelude::OsStrExt;
23use std::path::Path;
24use std::sync::Arc;
25use std::sync::OnceLock;
26
27#[cfg(target_arch = "aarch64")]
28pub use aarch64::*;
29use base::errno_result;
30use base::error;
31use base::ioctl;
32use base::ioctl_with_mut_ref;
33use base::ioctl_with_ref;
34use base::ioctl_with_val;
35use base::pagesize;
36use base::AsRawDescriptor;
37use base::Error;
38use base::Event;
39use base::FromRawDescriptor;
40use base::MappedRegion;
41use base::MemoryMapping;
42use base::MemoryMappingBuilder;
43use base::MmapError;
44use base::Protection;
45use base::RawDescriptor;
46use base::Result;
47use base::SafeDescriptor;
48pub use cap::KvmCap;
49use cfg_if::cfg_if;
50use kvm_sys::*;
51use libc::open64;
52use libc::EFAULT;
53use libc::EINVAL;
54use libc::EIO;
55use libc::ENOENT;
56use libc::ENOSPC;
57use libc::ENOSYS;
58#[cfg(not(target_arch = "aarch64"))]
59use libc::ENOTSUP;
60use libc::EOVERFLOW;
61use libc::O_CLOEXEC;
62use libc::O_RDWR;
63#[cfg(target_arch = "riscv64")]
64use riscv64::*;
65use sync::Mutex;
66use vm_memory::GuestAddress;
67use vm_memory::GuestMemory;
68#[cfg(target_arch = "x86_64")]
69pub use x86_64::*;
70use zerocopy::FromZeros;
71
72use crate::BalloonEvent;
73use crate::ClockState;
74use crate::Config;
75use crate::Datamatch;
76use crate::DeviceKind;
77use crate::HypercallAbi;
78use crate::Hypervisor;
79use crate::HypervisorCap;
80use crate::HypervisorKind;
81use crate::IoEventAddress;
82use crate::IoOperation;
83use crate::IoParams;
84use crate::IrqRoute;
85use crate::IrqSource;
86use crate::MPState;
87use crate::MemCacheType;
88use crate::MemSlot;
89use crate::Vcpu;
90use crate::VcpuExit;
91use crate::VcpuSignalHandle;
92use crate::VcpuSignalHandleInner;
93use crate::Vm;
94use crate::VmCap;
95
96unsafe fn set_user_memory_region(
102 kvm: &KvmVm,
103 slot: MemSlot,
104 read_only: bool,
105 log_dirty_pages: bool,
106 cache: MemCacheType,
107 guest_addr: u64,
108 memory_size: u64,
109 userspace_addr: *mut u8,
110) -> Result<()> {
111 let mut use_2_variant = false;
112 let mut flags = 0;
113 if read_only {
114 flags |= KVM_MEM_READONLY;
115 }
116 if log_dirty_pages {
117 flags |= KVM_MEM_LOG_DIRTY_PAGES;
118 }
119 if kvm.caps.user_noncoherent_dma && cache == MemCacheType::CacheNonCoherent {
120 flags |= KVM_MEM_NON_COHERENT_DMA;
121 use_2_variant = kvm.caps.user_memory_region2;
122 }
123
124 let untagged_userspace_addr = untagged_addr(userspace_addr as usize);
125 let ret = if use_2_variant {
126 let region2 = kvm_userspace_memory_region2 {
127 slot,
128 flags,
129 guest_phys_addr: guest_addr,
130 memory_size,
131 userspace_addr: untagged_userspace_addr as u64,
132 guest_memfd_offset: 0,
133 guest_memfd: 0,
134 ..Default::default()
135 };
136 ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION2, ®ion2)
137 } else {
138 let region = kvm_userspace_memory_region {
139 slot,
140 flags,
141 guest_phys_addr: guest_addr,
142 memory_size,
143 userspace_addr: (untagged_userspace_addr as u64),
144 };
145 ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION, ®ion)
146 };
147
148 if ret == 0 {
149 Ok(())
150 } else {
151 errno_result()
152 }
153}
154
155#[inline]
159fn untagged_addr(addr: usize) -> usize {
160 let tag_bits_mask: u64 = if cfg!(target_arch = "aarch64") {
161 0xFF00000000000000
162 } else {
163 0
164 };
165 addr & !tag_bits_mask as usize
166}
167
168pub fn dirty_log_bitmap_size(size: usize) -> usize {
175 let page_size = pagesize();
176 size.div_ceil(page_size).div_ceil(8)
177}
178
179pub struct Kvm {
180 kvm: SafeDescriptor,
181 vcpu_mmap_size: usize,
182}
183
184impl Kvm {
185 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
186 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
187 let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
190 if ret < 0 {
191 return errno_result();
192 }
193 let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
196
197 let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION) };
200 if version < 0 {
201 return errno_result();
202 }
203
204 if version as u32 != KVM_API_VERSION {
207 error!(
208 "KVM_GET_API_VERSION: expected {}, got {}",
209 KVM_API_VERSION, version,
210 );
211 return Err(Error::new(ENOSYS));
212 }
213
214 let res = unsafe { ioctl(&kvm, KVM_GET_VCPU_MMAP_SIZE) };
217 if res <= 0 {
218 return errno_result();
219 }
220 let vcpu_mmap_size = res as usize;
221
222 Ok(Kvm {
223 kvm,
224 vcpu_mmap_size,
225 })
226 }
227
228 pub fn new() -> Result<Kvm> {
230 Kvm::new_with_path(Path::new("/dev/kvm"))
231 }
232}
233
234impl AsRawDescriptor for Kvm {
235 fn as_raw_descriptor(&self) -> RawDescriptor {
236 self.kvm.as_raw_descriptor()
237 }
238}
239
240impl Hypervisor for Kvm {
241 fn try_clone(&self) -> Result<Self> {
242 Ok(Kvm {
243 kvm: self.kvm.try_clone()?,
244 vcpu_mmap_size: self.vcpu_mmap_size,
245 })
246 }
247
248 fn check_capability(&self, cap: HypervisorCap) -> bool {
249 if let Ok(kvm_cap) = KvmCap::try_from(cap) {
250 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, kvm_cap as c_ulong) == 1 }
254 } else {
255 false
257 }
258 }
259}
260
261#[derive(Clone, Default)]
263struct KvmVmCaps {
264 kvmclock_ctrl: bool,
265 user_noncoherent_dma: bool,
266 user_memory_region2: bool,
267 msi_devid: Arc<OnceLock<bool>>,
270}
271
272pub struct KvmVm {
274 kvm: Kvm,
275 vm: SafeDescriptor,
276 guest_mem: GuestMemory,
277 mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
278 mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
280 caps: KvmVmCaps,
281 force_disable_readonly_mem: bool,
282}
283
284impl KvmVm {
285 pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> {
287 let ret = unsafe {
291 ioctl_with_val(
292 kvm,
293 KVM_CREATE_VM,
294 kvm.get_vm_type(cfg.protection_type)? as c_ulong,
295 )
296 };
297 if ret < 0 {
298 return errno_result();
299 }
300 let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
303 let mut vm = KvmVm {
304 kvm: kvm.try_clone()?,
305 vm: vm_descriptor,
306 guest_mem,
307 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
308 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
309 caps: Default::default(),
310 force_disable_readonly_mem: cfg.force_disable_readonly_mem,
311 };
312 vm.caps.kvmclock_ctrl = vm.check_raw_capability(KvmCap::KvmclockCtrl);
313 vm.caps.user_noncoherent_dma = vm.check_raw_capability(KvmCap::MemNoncoherentDma);
314 vm.caps.user_memory_region2 = vm.check_raw_capability(KvmCap::UserMemory2);
315
316 vm.init_arch(&cfg)?;
317
318 for region in vm.guest_mem.regions() {
319 unsafe {
322 set_user_memory_region(
323 &vm,
324 region.index as MemSlot,
325 false,
326 false,
327 MemCacheType::CacheCoherent,
328 region.guest_addr.offset(),
329 region.size as u64,
330 region.host_addr as *mut u8,
331 )
332 }?;
333 }
334
335 Ok(vm)
336 }
337
338 pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> {
339 let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU, c_ulong::try_from(id).unwrap()) };
342 if fd < 0 {
343 return errno_result();
344 }
345
346 let vcpu = unsafe { File::from_raw_descriptor(fd) };
350
351 let run_mmap = MemoryMappingBuilder::new(self.kvm.vcpu_mmap_size)
356 .from_file(&vcpu)
357 .build()
358 .map_err(|_| Error::new(ENOSPC))?;
359
360 Ok(KvmVcpu {
361 kvm: self.kvm.try_clone()?,
362 vm: self.vm.try_clone()?,
363 vcpu,
364 id,
365 cap_kvmclock_ctrl: self.caps.kvmclock_ctrl,
366 run_mmap: Arc::new(run_mmap),
367 })
368 }
369
370 pub fn create_irq_chip(&self) -> Result<()> {
374 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP) };
377 if ret == 0 {
378 Ok(())
379 } else {
380 errno_result()
381 }
382 }
383
384 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
386 let mut irq_level = kvm_irq_level::default();
387 irq_level.__bindgen_anon_1.irq = irq;
388 irq_level.level = active.into();
389
390 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE, &irq_level) };
394 if ret == 0 {
395 Ok(())
396 } else {
397 errno_result()
398 }
399 }
400
401 pub fn register_irqfd(
404 &self,
405 gsi: u32,
406 evt: &Event,
407 resample_evt: Option<&Event>,
408 ) -> Result<()> {
409 let mut irqfd = kvm_irqfd {
410 fd: evt.as_raw_descriptor() as u32,
411 gsi,
412 ..Default::default()
413 };
414
415 if let Some(r_evt) = resample_evt {
416 irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE;
417 irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
418 }
419
420 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
424 if ret == 0 {
425 Ok(())
426 } else {
427 errno_result()
428 }
429 }
430
431 pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
437 let irqfd = kvm_irqfd {
438 fd: evt.as_raw_descriptor() as u32,
439 gsi,
440 flags: KVM_IRQFD_FLAG_DEASSIGN,
441 ..Default::default()
442 };
443 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
447 if ret == 0 {
448 Ok(())
449 } else {
450 errno_result()
451 }
452 }
453
454 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
457 let mut irq_routing =
458 kvm_irq_routing::<[kvm_irq_routing_entry]>::new_box_zeroed_with_elems(routes.len())
459 .unwrap();
460 irq_routing.nr = routes.len() as u32;
461
462 let cap_msi_devid = *self
463 .caps
464 .msi_devid
465 .get_or_init(|| self.check_raw_capability(KvmCap::MsiDevid));
466
467 for (route, irq_route) in routes.iter().zip(irq_routing.entries.iter_mut()) {
468 *irq_route = to_kvm_irq_routing_entry(route, cap_msi_devid);
469 }
470
471 #[allow(clippy::undocumented_unsafe_blocks)]
473 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING, &*irq_routing) };
474 if ret == 0 {
475 Ok(())
476 } else {
477 errno_result()
478 }
479 }
480
481 fn ioeventfd(
482 &self,
483 evt: &Event,
484 addr: IoEventAddress,
485 datamatch: Datamatch,
486 deassign: bool,
487 ) -> Result<()> {
488 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
489 Datamatch::AnyLength => (false, 0, 0),
490 Datamatch::U8(v) => match v {
491 Some(u) => (true, u as u64, 1),
492 None => (false, 0, 1),
493 },
494 Datamatch::U16(v) => match v {
495 Some(u) => (true, u as u64, 2),
496 None => (false, 0, 2),
497 },
498 Datamatch::U32(v) => match v {
499 Some(u) => (true, u as u64, 4),
500 None => (false, 0, 4),
501 },
502 Datamatch::U64(v) => match v {
503 Some(u) => (true, u, 8),
504 None => (false, 0, 8),
505 },
506 };
507 let mut flags = 0;
508 if deassign {
509 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
510 }
511 if do_datamatch {
512 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
513 }
514 if let IoEventAddress::Pio(_) = addr {
515 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
516 }
517 let ioeventfd = kvm_ioeventfd {
518 datamatch: datamatch_value,
519 len: datamatch_len,
520 addr: match addr {
521 IoEventAddress::Pio(p) => p,
522 IoEventAddress::Mmio(m) => m,
523 },
524 fd: evt.as_raw_descriptor(),
525 flags,
526 ..Default::default()
527 };
528 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD, &ioeventfd) };
532 if ret == 0 {
533 Ok(())
534 } else {
535 errno_result()
536 }
537 }
538
539 pub fn check_raw_capability(&self, capability: KvmCap) -> bool {
541 let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, capability as c_ulong) };
545 match capability {
546 #[cfg(target_arch = "x86_64")]
547 KvmCap::BusLockDetect => {
548 if ret > 0 {
549 ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT
550 } else {
551 false
552 }
553 }
554 _ => ret == 1,
555 }
556 }
557
558 #[allow(dead_code)]
560 unsafe fn enable_raw_capability(
567 &self,
568 capability: KvmCap,
569 flags: u32,
570 args: &[u64; 4],
571 ) -> Result<()> {
572 let kvm_cap = kvm_enable_cap {
573 cap: capability as u32,
574 args: *args,
575 flags,
576 ..Default::default()
577 };
578 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
582 if ret == 0 {
583 Ok(())
584 } else {
585 errno_result()
586 }
587 }
588
589 fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
590 match self.guest_mem.remove_range(guest_address, size) {
591 Ok(_) => Ok(()),
592 Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
593 Err(_) => Err(Error::new(EIO)),
594 }
595 }
596
597 fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
598 Ok(())
600 }
601}
602
603impl Vm for KvmVm {
604 fn try_clone(&self) -> Result<Self> {
605 Ok(KvmVm {
606 kvm: self.kvm.try_clone()?,
607 vm: self.vm.try_clone()?,
608 guest_mem: self.guest_mem.clone(),
609 mem_regions: self.mem_regions.clone(),
610 mem_slot_gaps: self.mem_slot_gaps.clone(),
611 caps: self.caps.clone(),
612 force_disable_readonly_mem: self.force_disable_readonly_mem,
613 })
614 }
615
616 fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
617 self.vm.try_clone()
618 }
619
620 fn hypervisor_kind(&self) -> HypervisorKind {
621 HypervisorKind::Kvm
622 }
623
624 fn check_capability(&self, c: VmCap) -> bool {
625 if let Some(val) = self.check_capability_arch(c) {
626 return val;
627 }
628 match c {
629 #[cfg(target_arch = "aarch64")]
630 VmCap::ArmPmuV3 => self.check_raw_capability(KvmCap::ArmPmuV3),
631 VmCap::DirtyLog => true,
632 VmCap::PvClock => false,
633 VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm),
634 VmCap::EarlyInitCpuid => false,
635 #[cfg(target_arch = "x86_64")]
636 VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect),
637 VmCap::ReadOnlyMemoryRegion => {
638 !self.force_disable_readonly_mem && self.check_raw_capability(KvmCap::ReadonlyMem)
639 }
640 VmCap::MemNoncoherentDma => {
641 cfg!(feature = "noncoherent-dma")
642 && self.check_raw_capability(KvmCap::MemNoncoherentDma)
643 }
644 #[cfg(target_arch = "aarch64")]
645 VmCap::Sve => self.check_raw_capability(KvmCap::Sve),
646 }
647 }
648
649 fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> {
650 match c {
651 #[cfg(target_arch = "x86_64")]
652 VmCap::BusLockDetect => {
653 let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0];
654 Ok(
655 #[allow(clippy::undocumented_unsafe_blocks)]
657 unsafe {
658 self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(())
659 },
660 )
661 }
662 _ => Ok(false),
663 }
664 }
665
666 fn get_guest_phys_addr_bits(&self) -> u8 {
667 self.kvm.get_guest_phys_addr_bits()
668 }
669
670 fn get_memory(&self) -> &GuestMemory {
671 &self.guest_mem
672 }
673
674 fn add_memory_region(
675 &mut self,
676 guest_addr: GuestAddress,
677 mem: Box<dyn MappedRegion>,
678 read_only: bool,
679 log_dirty_pages: bool,
680 cache: MemCacheType,
681 ) -> Result<MemSlot> {
682 let pgsz = pagesize() as u64;
683 let size = (mem.size() as u64).next_multiple_of(pgsz);
687 let end_addr = guest_addr
688 .checked_add(size)
689 .ok_or_else(|| Error::new(EOVERFLOW))?;
690 if self.guest_mem.range_overlap(guest_addr, end_addr) {
691 return Err(Error::new(ENOSPC));
692 }
693 let mut regions = self.mem_regions.lock();
694 let mut gaps = self.mem_slot_gaps.lock();
695 let slot = match gaps.pop() {
696 Some(gap) => gap.0,
697 None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
698 };
699
700 let res = unsafe {
706 set_user_memory_region(
707 self,
708 slot,
709 read_only,
710 log_dirty_pages,
711 cache,
712 guest_addr.offset(),
713 size,
714 mem.as_ptr(),
715 )
716 };
717
718 if let Err(e) = res {
719 gaps.push(Reverse(slot));
720 return Err(e);
721 }
722 regions.insert(slot, mem);
723 Ok(slot)
724 }
725
726 fn enable_hypercalls(&mut self, nr: u64, count: usize) -> Result<()> {
727 cfg_if! {
728 if #[cfg(target_arch = "aarch64")] {
729 let base = u32::try_from(nr).unwrap();
730 let nr_functions = u32::try_from(count).unwrap();
731 self.enable_smccc_forwarding(base, nr_functions)
732 } else {
733 let _ = nr;
734 let _ = count;
735 Err(Error::new(ENOTSUP))
736 }
737 }
738 }
739
740 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
741 let mut regions = self.mem_regions.lock();
742 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
743
744 mem.msync(offset, size).map_err(|err| match err {
745 MmapError::InvalidAddress => Error::new(EFAULT),
746 MmapError::NotPageAligned => Error::new(EINVAL),
747 MmapError::SystemCallFailed(e) => e,
748 _ => Error::new(EIO),
749 })
750 }
751
752 fn madvise_pageout_memory_region(
753 &mut self,
754 slot: MemSlot,
755 offset: usize,
756 size: usize,
757 ) -> Result<()> {
758 let mut regions = self.mem_regions.lock();
759 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
760
761 mem.madvise(offset, size, libc::MADV_PAGEOUT)
762 .map_err(|err| match err {
763 MmapError::InvalidAddress => Error::new(EFAULT),
764 MmapError::NotPageAligned => Error::new(EINVAL),
765 MmapError::SystemCallFailed(e) => e,
766 _ => Error::new(EIO),
767 })
768 }
769
770 fn madvise_remove_memory_region(
771 &mut self,
772 slot: MemSlot,
773 offset: usize,
774 size: usize,
775 ) -> Result<()> {
776 let mut regions = self.mem_regions.lock();
777 let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
778
779 mem.madvise(offset, size, libc::MADV_REMOVE)
780 .map_err(|err| match err {
781 MmapError::InvalidAddress => Error::new(EFAULT),
782 MmapError::NotPageAligned => Error::new(EINVAL),
783 MmapError::SystemCallFailed(e) => e,
784 _ => Error::new(EIO),
785 })
786 }
787
788 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
789 let mut regions = self.mem_regions.lock();
790 if !regions.contains_key(&slot) {
791 return Err(Error::new(ENOENT));
792 }
793 unsafe {
796 set_user_memory_region(
797 self,
798 slot,
799 false,
800 false,
801 MemCacheType::CacheCoherent,
802 0,
803 0,
804 std::ptr::null_mut(),
805 )?;
806 }
807 self.mem_slot_gaps.lock().push(Reverse(slot));
808 Ok(regions.remove(&slot).unwrap())
810 }
811
812 fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> {
813 let mut device = if let Some(dev) = self.get_device_params_arch(kind) {
814 dev
815 } else {
816 match kind {
817 DeviceKind::Vfio => kvm_create_device {
818 type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
819 fd: 0,
820 flags: 0,
821 },
822
823 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
825 _ => return Err(Error::new(libc::ENXIO)),
826 }
827 };
828
829 let ret = unsafe { base::ioctl_with_mut_ref(self, KVM_CREATE_DEVICE, &mut device) };
833 if ret == 0 {
834 Ok(
835 unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) },
838 )
839 } else {
840 errno_result()
841 }
842 }
843
844 fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> {
845 let regions = self.mem_regions.lock();
846 let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?;
847 if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() {
849 return Err(Error::new(EINVAL));
850 }
851
852 let mut dirty_log_kvm = kvm_dirty_log {
853 slot,
854 ..Default::default()
855 };
856 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
857 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG, &dirty_log_kvm) };
861 if ret == 0 {
862 Ok(())
863 } else {
864 errno_result()
865 }
866 }
867
868 fn register_ioevent(
869 &mut self,
870 evt: &Event,
871 addr: IoEventAddress,
872 datamatch: Datamatch,
873 ) -> Result<()> {
874 self.ioeventfd(evt, addr, datamatch, false)
875 }
876
877 fn unregister_ioevent(
878 &mut self,
879 evt: &Event,
880 addr: IoEventAddress,
881 datamatch: Datamatch,
882 ) -> Result<()> {
883 self.ioeventfd(evt, addr, datamatch, true)
884 }
885
886 fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
887 Ok(())
889 }
890
891 fn get_pvclock(&self) -> Result<ClockState> {
892 self.get_pvclock_arch()
893 }
894
895 fn set_pvclock(&self, state: &ClockState) -> Result<()> {
896 self.set_pvclock_arch(state)
897 }
898
899 fn add_fd_mapping(
900 &mut self,
901 slot: u32,
902 offset: usize,
903 size: usize,
904 fd: &dyn AsRawDescriptor,
905 fd_offset: u64,
906 prot: Protection,
907 ) -> Result<()> {
908 let mut regions = self.mem_regions.lock();
909 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
910
911 match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
912 Ok(()) => Ok(()),
913 Err(MmapError::SystemCallFailed(e)) => Err(e),
914 Err(_) => Err(Error::new(EIO)),
915 }
916 }
917
918 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
919 let mut regions = self.mem_regions.lock();
920 let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
921
922 match region.remove_mapping(offset, size) {
923 Ok(()) => Ok(()),
924 Err(MmapError::SystemCallFailed(e)) => Err(e),
925 Err(_) => Err(Error::new(EIO)),
926 }
927 }
928
929 fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
930 match event {
931 BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
932 BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
933 BalloonEvent::BalloonTargetReached(_) => Ok(()),
934 }
935 }
936}
937
938impl AsRawDescriptor for KvmVm {
939 fn as_raw_descriptor(&self) -> RawDescriptor {
940 self.vm.as_raw_descriptor()
941 }
942}
943
944struct KvmVcpuSignalHandle {
945 run_mmap: Arc<MemoryMapping>,
946}
947
948impl VcpuSignalHandleInner for KvmVcpuSignalHandle {
949 fn signal_immediate_exit(&self) {
950 unsafe {
953 let run = self.run_mmap.as_ptr() as *mut kvm_run;
954 (*run).immediate_exit = 1;
955 }
956 }
957}
958
959pub struct KvmVcpu {
961 kvm: Kvm,
962 vm: SafeDescriptor,
963 vcpu: File,
964 id: usize,
965 cap_kvmclock_ctrl: bool,
966 run_mmap: Arc<MemoryMapping>,
967}
968
969impl Vcpu for KvmVcpu {
970 fn try_clone(&self) -> Result<Self> {
971 let vm = self.vm.try_clone()?;
972 let vcpu = self.vcpu.try_clone()?;
973
974 Ok(KvmVcpu {
975 kvm: self.kvm.try_clone()?,
976 vm,
977 vcpu,
978 cap_kvmclock_ctrl: self.cap_kvmclock_ctrl,
979 id: self.id,
980 run_mmap: self.run_mmap.clone(),
981 })
982 }
983
984 fn as_vcpu(&self) -> &dyn Vcpu {
985 self
986 }
987
988 fn id(&self) -> usize {
989 self.id
990 }
991
992 #[allow(clippy::cast_ptr_alignment)]
993 fn set_immediate_exit(&self, exit: bool) {
994 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
999 run.immediate_exit = exit.into();
1000 }
1001
1002 fn signal_handle(&self) -> VcpuSignalHandle {
1003 VcpuSignalHandle {
1004 inner: Box::new(KvmVcpuSignalHandle {
1005 run_mmap: self.run_mmap.clone(),
1006 }),
1007 }
1008 }
1009
1010 fn on_suspend(&self) -> Result<()> {
1011 if self.cap_kvmclock_ctrl {
1016 if unsafe { ioctl(self, KVM_KVMCLOCK_CTRL) } != 0 {
1019 if Error::last().errno() != libc::EINVAL {
1022 return errno_result();
1023 }
1024 }
1025 }
1026
1027 Ok(())
1028 }
1029
1030 unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> {
1031 let kvm_cap = kvm_enable_cap {
1032 cap,
1033 args: *args,
1034 ..Default::default()
1035 };
1036 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap);
1040 if ret == 0 {
1041 Ok(())
1042 } else {
1043 errno_result()
1044 }
1045 }
1046
1047 #[allow(clippy::cast_ptr_alignment)]
1048 fn run(&mut self) -> Result<VcpuExit> {
1051 let ret = unsafe { ioctl(self, KVM_RUN) };
1054 if ret != 0 {
1055 return errno_result();
1056 }
1057
1058 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1062
1063 if let Some(vcpu_exit) = self.handle_vm_exit_arch(run) {
1066 return Ok(vcpu_exit);
1067 }
1068
1069 match run.exit_reason {
1070 KVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
1071 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1072 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1073 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1074 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1075 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown(Ok(()))),
1076 KVM_EXIT_FAIL_ENTRY => {
1077 let hardware_entry_failure_reason = unsafe {
1081 run.__bindgen_anon_1
1082 .fail_entry
1083 .hardware_entry_failure_reason
1084 };
1085 Ok(VcpuExit::FailEntry {
1086 hardware_entry_failure_reason,
1087 })
1088 }
1089 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1090 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1091 KVM_EXIT_SYSTEM_EVENT => {
1092 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1096 let event_flags =
1097 unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1101 match event_type {
1102 KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1103 KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags),
1104 KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1105 _ => {
1106 error!(
1107 "Unknown KVM system event {} with flags {}",
1108 event_type, event_flags
1109 );
1110 Err(Error::new(EINVAL))
1111 }
1112 }
1113 }
1114 r => panic!("unknown kvm exit reason: {r}"),
1115 }
1116 }
1117
1118 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
1119 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1123 assert!(run.exit_reason == KVM_EXIT_MMIO);
1125 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1129 let address = mmio.phys_addr;
1130 let data = &mut mmio.data[..mmio.len as usize];
1131 if mmio.is_write != 0 {
1132 handle_fn(IoParams {
1133 address,
1134 operation: IoOperation::Write(data),
1135 })
1136 } else {
1137 handle_fn(IoParams {
1138 address,
1139 operation: IoOperation::Read(data),
1140 })
1141 }
1142 }
1143
1144 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
1145 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1149 assert!(run.exit_reason == KVM_EXIT_IO);
1151 let io = unsafe { run.__bindgen_anon_1.io };
1155 let address = u64::from(io.port);
1156 let size = usize::from(io.size);
1157 let count = io.count as usize;
1158 let data_len = count * size;
1159 let data_offset = io.data_offset as usize;
1160 assert!(data_offset + data_len <= self.run_mmap.size());
1161
1162 let buffer: &mut [u8] = unsafe {
1166 std::slice::from_raw_parts_mut(
1167 (run as *mut kvm_run as *mut u8).add(data_offset),
1168 data_len,
1169 )
1170 };
1171 let data_chunks = buffer.chunks_mut(size);
1172
1173 if io.direction == KVM_EXIT_IO_IN as u8 {
1174 for data in data_chunks {
1175 handle_fn(IoParams {
1176 address,
1177 operation: IoOperation::Read(data),
1178 });
1179 }
1180 } else {
1181 debug_assert_eq!(io.direction, KVM_EXIT_IO_OUT as u8);
1182 for data in data_chunks {
1183 handle_fn(IoParams {
1184 address,
1185 operation: IoOperation::Write(data),
1186 });
1187 }
1188 }
1189
1190 Ok(())
1191 }
1192
1193 fn handle_hypercall(
1194 &self,
1195 handle_fn: &mut dyn FnMut(&mut HypercallAbi) -> anyhow::Result<()>,
1196 ) -> anyhow::Result<()> {
1197 cfg_if! {
1198 if #[cfg(target_arch = "aarch64")] {
1199 self.handle_smccc_call(handle_fn)
1201 } else {
1202 let _ = handle_fn;
1203 unimplemented!("KvmVcpu::handle_hypercall() not supported");
1204 }
1205 }
1206 }
1207}
1208
1209impl KvmVcpu {
1210 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1218 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1220 let ret = {
1221 unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE, &mut state) }
1226 };
1227 if ret < 0 {
1228 return errno_result();
1229 }
1230 Ok(state)
1231 }
1232
1233 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1241 let ret = {
1242 unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE, state) }
1245 };
1246 if ret < 0 {
1247 return errno_result();
1248 }
1249 Ok(())
1250 }
1251}
1252
1253impl AsRawDescriptor for KvmVcpu {
1254 fn as_raw_descriptor(&self) -> RawDescriptor {
1255 self.vcpu.as_raw_descriptor()
1256 }
1257}
1258
1259impl TryFrom<HypervisorCap> for KvmCap {
1260 type Error = Error;
1261
1262 fn try_from(cap: HypervisorCap) -> Result<KvmCap> {
1263 match cap {
1264 HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit),
1265 HypervisorCap::UserMemory => Ok(KvmCap::UserMemory),
1266 #[cfg(target_arch = "x86_64")]
1267 HypervisorCap::Xcrs => Ok(KvmCap::Xcrs),
1268 #[cfg(target_arch = "x86_64")]
1269 HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)),
1270 HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)),
1271 HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)),
1272 }
1273 }
1274}
1275
1276fn to_kvm_irq_routing_entry(item: &IrqRoute, cap_msi_devid: bool) -> kvm_irq_routing_entry {
1277 match &item.source {
1278 IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry {
1279 gsi: item.gsi,
1280 type_: KVM_IRQ_ROUTING_IRQCHIP,
1281 u: kvm_irq_routing_entry__bindgen_ty_1 {
1282 irqchip: kvm_irq_routing_irqchip {
1283 irqchip: chip_to_kvm_chip(*chip),
1284 pin: *pin,
1285 },
1286 },
1287 ..Default::default()
1288 },
1289 IrqSource::Msi {
1290 address,
1291 data,
1292 #[cfg(target_arch = "aarch64")]
1293 pci_address,
1294 } => {
1295 let devid = if cap_msi_devid {
1299 #[cfg(not(target_arch = "aarch64"))]
1300 panic!("unexpected KVM_CAP_MSI_DEVID");
1301 #[cfg(target_arch = "aarch64")]
1302 Some(pci_address.to_u32())
1303 } else {
1304 None
1305 };
1306 kvm_irq_routing_entry {
1307 gsi: item.gsi,
1308 type_: KVM_IRQ_ROUTING_MSI,
1309 flags: if devid.is_some() {
1310 KVM_MSI_VALID_DEVID
1311 } else {
1312 0
1313 },
1314 u: kvm_irq_routing_entry__bindgen_ty_1 {
1315 msi: kvm_irq_routing_msi {
1316 address_lo: *address as u32,
1317 address_hi: (*address >> 32) as u32,
1318 data: *data,
1319 __bindgen_anon_1: kvm_irq_routing_msi__bindgen_ty_1 {
1320 devid: devid.unwrap_or_default(),
1321 },
1322 },
1323 },
1324 ..Default::default()
1325 }
1326 }
1327 }
1328}
1329
1330impl From<&kvm_mp_state> for MPState {
1331 fn from(item: &kvm_mp_state) -> Self {
1332 match item.mp_state {
1333 KVM_MP_STATE_RUNNABLE => MPState::Runnable,
1334 KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized,
1335 KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived,
1336 KVM_MP_STATE_HALTED => MPState::Halted,
1337 KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived,
1338 KVM_MP_STATE_STOPPED => MPState::Stopped,
1339 state => {
1340 error!(
1341 "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE",
1342 state
1343 );
1344 MPState::Runnable
1345 }
1346 }
1347 }
1348}
1349
1350impl From<&MPState> for kvm_mp_state {
1351 fn from(item: &MPState) -> Self {
1352 kvm_mp_state {
1353 mp_state: match item {
1354 MPState::Runnable => KVM_MP_STATE_RUNNABLE,
1355 MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED,
1356 MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED,
1357 MPState::Halted => KVM_MP_STATE_HALTED,
1358 MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED,
1359 MPState::Stopped => KVM_MP_STATE_STOPPED,
1360 },
1361 }
1362 }
1363}