1use std::collections::HashMap;
6use std::ffi::CString;
7use std::fs::File;
8use std::fs::OpenOptions;
9use std::io;
10use std::mem;
11use std::os::raw::c_ulong;
12use std::os::unix::prelude::FileExt;
13use std::path::Path;
14use std::path::PathBuf;
15#[cfg(all(target_os = "android", target_arch = "aarch64"))]
16use std::ptr::addr_of_mut;
17use std::result;
18use std::slice;
19use std::sync::Arc;
20use std::sync::OnceLock;
21
22use base::error;
23use base::ioctl;
24use base::ioctl_with_mut_ptr;
25use base::ioctl_with_mut_ref;
26use base::ioctl_with_ptr;
27use base::ioctl_with_ref;
28use base::ioctl_with_val;
29use base::warn;
30use base::AsRawDescriptor;
31use base::Error;
32use base::Event;
33use base::FromRawDescriptor;
34use base::RawDescriptor;
35use base::SafeDescriptor;
36use cfg_if::cfg_if;
37use data_model::vec_with_array_field;
38use hypervisor::DeviceKind;
39use hypervisor::Vm;
40use rand::seq::index::sample;
41use remain::sorted;
42use resources::address_allocator::AddressAllocator;
43use resources::AddressRange;
44use resources::Alloc;
45use resources::Error as ResourcesError;
46use sync::Mutex;
47use thiserror::Error;
48use vfio_sys::vfio::vfio_acpi_dsm;
49use vfio_sys::vfio::VFIO_IRQ_SET_DATA_BOOL;
50use vfio_sys::*;
51use zerocopy::FromBytes;
52use zerocopy::Immutable;
53use zerocopy::IntoBytes;
54
55use crate::IommuDevType;
56
57#[sorted]
58#[derive(Error, Debug)]
59pub enum VfioError {
60 #[error("failed to duplicate VfioContainer")]
61 ContainerDupError,
62 #[error("failed to set container's IOMMU driver type as {0:?}: {1}")]
63 ContainerSetIOMMU(IommuType, Error),
64 #[error("failed to create KVM vfio device")]
65 CreateVfioKvmDevice,
66 #[error("failed to get Group Status: {0}")]
67 GetGroupStatus(Error),
68 #[error("failed to get vfio device fd: {0}")]
69 GroupGetDeviceFD(Error),
70 #[error("failed to add vfio group into vfio container: {0}")]
71 GroupSetContainer(Error),
72 #[error("group is inviable")]
73 GroupViable,
74 #[error("invalid region index: {0}")]
75 InvalidIndex(usize),
76 #[error("invalid operation")]
77 InvalidOperation,
78 #[error("invalid file path")]
79 InvalidPath,
80 #[error("failed to add guest memory map into iommu table: {0}")]
81 IommuDmaMap(Error),
82 #[error("failed to remove guest memory map from iommu table: {0}")]
83 IommuDmaUnmap(Error),
84 #[error("failed to get IOMMU cap info from host")]
85 IommuGetCapInfo,
86 #[error("failed to get IOMMU info from host: {0}")]
87 IommuGetInfo(Error),
88 #[error("failed to attach device to pKVM pvIOMMU: {0}")]
89 KvmPviommuSetConfig(Error),
90 #[error("failed to set KVM vfio device's attribute: {0}")]
91 KvmSetDeviceAttr(Error),
92 #[error("AddressAllocator is unavailable")]
93 NoRescAlloc,
94 #[error("failed to open /dev/vfio/vfio container: {0}")]
95 OpenContainer(io::Error),
96 #[error("failed to open {1} group: {0}")]
97 OpenGroup(io::Error, String),
98 #[error("failed to read {1} link: {0}")]
99 ReadLink(io::Error, PathBuf),
100 #[error("resources error: {0}")]
101 Resources(ResourcesError),
102 #[error("unknown vfio device type (flags: {0:#x})")]
103 UnknownDeviceType(u32),
104 #[error("failed to call vfio device's ACPI _DSM: {0}")]
105 VfioAcpiDsm(Error),
106 #[error("failed to disable vfio device's acpi notification: {0}")]
107 VfioAcpiNotificationDisable(Error),
108 #[error("failed to enable vfio device's acpi notification: {0}")]
109 VfioAcpiNotificationEnable(Error),
110 #[error("failed to test vfio device's acpi notification: {0}")]
111 VfioAcpiNotificationTest(Error),
112 #[error(
113 "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/src/vfio.rs"
114 )]
115 VfioApiVersion,
116 #[error("failed to get vfio device's info or info doesn't match: {0}")]
117 VfioDeviceGetInfo(Error),
118 #[error("failed to get vfio device's region info: {0}")]
119 VfioDeviceGetRegionInfo(Error),
120 #[error("container doesn't support IOMMU driver type {0:?}")]
121 VfioIommuSupport(IommuType),
122 #[error("failed to disable vfio device's irq: {0}")]
123 VfioIrqDisable(Error),
124 #[error("failed to enable vfio device's irq: {0}")]
125 VfioIrqEnable(Error),
126 #[error("failed to mask vfio device's irq: {0}")]
127 VfioIrqMask(Error),
128 #[error("failed to unmask vfio device's irq: {0}")]
129 VfioIrqUnmask(Error),
130 #[error("failed to enter vfio device's low power state: {0}")]
131 VfioPmLowPowerEnter(Error),
132 #[error("failed to exit vfio device's low power state: {0}")]
133 VfioPmLowPowerExit(Error),
134 #[error("failed to probe support for VFIO low power state entry: {0}")]
135 VfioProbePmLowPowerEntry(Error),
136 #[error("failed to probe support for VFIO low power state exit: {0}")]
137 VfioProbePmLowPowerExit(Error),
138}
139
140type Result<T> = std::result::Result<T, VfioError>;
141
142fn get_error() -> Error {
143 Error::last()
144}
145
146static KVM_VFIO_FILE: OnceLock<Option<SafeDescriptor>> = OnceLock::new();
147
148fn create_kvm_vfio_file(vm: &impl Vm) -> Option<&'static SafeDescriptor> {
149 KVM_VFIO_FILE
150 .get_or_init(|| vm.create_device(DeviceKind::Vfio).ok())
151 .as_ref()
152}
153
154fn kvm_vfio_file() -> Option<&'static SafeDescriptor> {
155 match KVM_VFIO_FILE.get() {
156 Some(Some(v)) => Some(v),
157 _ => None,
158 }
159}
160
161#[derive(Copy, Clone, Debug, PartialEq, Eq)]
162pub enum VfioDeviceType {
163 Pci,
164 Platform,
165}
166
167enum KvmVfioGroupOps {
168 Add,
169 Delete,
170}
171
172#[derive(Debug)]
173pub struct KvmVfioPviommu {
174 file: File,
175}
176
177impl KvmVfioPviommu {
178 pub fn new(vm: &impl Vm) -> Result<Self> {
179 cfg_if! {
180 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
181 let file = Self::ioctl_kvm_dev_vfio_pviommu_attach(vm)?;
182
183 Ok(Self { file })
184 } else {
185 let _ = vm;
186 unimplemented!()
187 }
188 }
189 }
190
191 pub fn attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()> {
192 cfg_if! {
193 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
194 self.ioctl_kvm_pviommu_set_config(device, sid_idx, vsid)
195 } else {
196 let _ = device;
197 let _ = sid_idx;
198 let _ = vsid;
199 unimplemented!()
200 }
201 }
202 }
203
204 pub fn id(&self) -> u32 {
205 let fd = self.as_raw_descriptor();
206 fd.try_into().unwrap()
208 }
209
210 pub fn get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32> {
211 cfg_if! {
212 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
213 let info = Self::ioctl_kvm_dev_vfio_pviommu_get_info(vm, device)?;
214
215 Ok(info.nr_sids)
216 } else {
217 let _ = vm;
218 let _ = device;
219 unimplemented!()
220 }
221 }
222 }
223
224 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
225 fn ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File> {
226 let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
227
228 let vfio_dev_attr = kvm_sys::kvm_device_attr {
229 flags: 0,
230 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
231 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_ATTACH as u64,
232 addr: 0,
233 };
234
235 let ret =
238 unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
239
240 if ret < 0 {
241 Err(VfioError::KvmSetDeviceAttr(get_error()))
242 } else {
243 Ok(unsafe { File::from_raw_descriptor(ret) })
245 }
246 }
247
248 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
249 fn ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>(
250 &self,
251 device: &T,
252 sid_idx: u32,
253 vsid: u32,
254 ) -> Result<()> {
255 let config = kvm_sys::kvm_vfio_iommu_config {
256 size: mem::size_of::<kvm_sys::kvm_vfio_iommu_config>() as u32,
257 device_fd: device.as_raw_descriptor(),
258 sid_idx,
259 vsid,
260 __reserved: 0,
261 };
262
263 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_PVIOMMU_SET_CONFIG, &config) };
267
268 if ret < 0 {
269 Err(VfioError::KvmPviommuSetConfig(get_error()))
270 } else {
271 Ok(())
272 }
273 }
274
275 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
276 fn ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>(
277 vm: &impl Vm,
278 device: &T,
279 ) -> Result<kvm_sys::kvm_vfio_iommu_info> {
280 let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
281
282 let mut info = kvm_sys::kvm_vfio_iommu_info {
283 size: mem::size_of::<kvm_sys::kvm_vfio_iommu_info>() as u32,
284 device_fd: device.as_raw_descriptor(),
285 nr_sids: 0,
286 __reserved: 0,
287 };
288
289 let vfio_dev_attr = kvm_sys::kvm_device_attr {
290 flags: 0,
291 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
292 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_GET_INFO as u64,
293 addr: addr_of_mut!(info) as usize as u64,
294 };
295
296 let ret =
299 unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
300
301 if ret < 0 {
302 Err(VfioError::KvmSetDeviceAttr(get_error()))
303 } else {
304 Ok(info)
305 }
306 }
307}
308
309impl AsRawDescriptor for KvmVfioPviommu {
310 fn as_raw_descriptor(&self) -> RawDescriptor {
311 self.file.as_raw_descriptor()
312 }
313}
314
315#[repr(u32)]
316#[derive(Copy, Clone, Debug, PartialEq, Eq)]
317pub enum IommuType {
318 Type1V2 = VFIO_TYPE1v2_IOMMU,
319 PkvmPviommu = VFIO_PKVM_PVIOMMU,
320 Type1ChromeOS = 100001,
326}
327
328pub struct VfioContainer {
330 container: File,
331 groups: HashMap<u32, Arc<Mutex<VfioGroup>>>,
332 iommu_type: Option<IommuType>,
333}
334
335fn extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T>
336where
337 T: FromBytes,
338{
339 Some(T::read_from_prefix(bytes.get(offset..)?).ok()?.0)
340}
341
342const VFIO_API_VERSION: u8 = 0;
343impl VfioContainer {
344 pub fn new() -> Result<Self> {
345 let container = OpenOptions::new()
346 .read(true)
347 .write(true)
348 .open("/dev/vfio/vfio")
349 .map_err(VfioError::OpenContainer)?;
350
351 Self::new_from_container(container)
352 }
353
354 pub fn new_from_container(container: File) -> Result<Self> {
356 let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION) };
359 if version as u8 != VFIO_API_VERSION {
360 return Err(VfioError::VfioApiVersion);
361 }
362
363 Ok(VfioContainer {
364 container,
365 groups: HashMap::new(),
366 iommu_type: None,
367 })
368 }
369
370 fn is_group_set(&self, group_id: u32) -> bool {
371 self.groups.contains_key(&group_id)
372 }
373
374 fn check_extension(&self, val: IommuType) -> bool {
375 let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION, val as c_ulong) };
378 ret != 0
379 }
380
381 fn set_iommu(&mut self, val: IommuType) -> i32 {
382 unsafe { ioctl_with_val(self, VFIO_SET_IOMMU, val as c_ulong) }
385 }
386
387 fn set_iommu_checked(&mut self, val: IommuType) -> Result<()> {
388 if !self.check_extension(val) {
389 Err(VfioError::VfioIommuSupport(val))
390 } else if self.set_iommu(val) != 0 {
391 Err(VfioError::ContainerSetIOMMU(val, get_error()))
392 } else {
393 self.iommu_type = Some(val);
394 Ok(())
395 }
396 }
397
398 pub unsafe fn vfio_dma_map(
402 &self,
403 iova: u64,
404 size: u64,
405 user_addr: u64,
406 write_en: bool,
407 ) -> Result<()> {
408 match self
409 .iommu_type
410 .expect("vfio_dma_map called before configuring IOMMU")
411 {
412 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
413 self.vfio_iommu_type1_dma_map(iova, size, user_addr, write_en)
414 }
415 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
416 }
417 }
418
419 unsafe fn vfio_iommu_type1_dma_map(
423 &self,
424 iova: u64,
425 size: u64,
426 user_addr: u64,
427 write_en: bool,
428 ) -> Result<()> {
429 let mut dma_map = vfio_iommu_type1_dma_map {
430 argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
431 flags: VFIO_DMA_MAP_FLAG_READ,
432 vaddr: user_addr,
433 iova,
434 size,
435 };
436
437 if write_en {
438 dma_map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
439 }
440
441 let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA, &dma_map);
442 if ret != 0 {
443 return Err(VfioError::IommuDmaMap(get_error()));
444 }
445
446 Ok(())
447 }
448
449 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
450 match self
451 .iommu_type
452 .expect("vfio_dma_unmap called before configuring IOMMU")
453 {
454 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
455 self.vfio_iommu_type1_dma_unmap(iova, size)
456 }
457 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
458 }
459 }
460
461 fn vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
462 let mut dma_unmap = vfio_iommu_type1_dma_unmap {
463 argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
464 flags: 0,
465 iova,
466 size,
467 ..Default::default()
468 };
469
470 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA, &mut dma_unmap) };
474 if ret != 0 || dma_unmap.size != size {
475 return Err(VfioError::IommuDmaUnmap(get_error()));
476 }
477
478 Ok(())
479 }
480
481 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
482 match self
483 .iommu_type
484 .expect("vfio_get_iommu_page_size_mask called before configuring IOMMU")
485 {
486 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
487 self.vfio_iommu_type1_get_iommu_page_size_mask()
488 }
489 IommuType::PkvmPviommu => Ok(0),
490 }
491 }
492
493 fn vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64> {
494 let mut iommu_info = vfio_iommu_type1_info {
495 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
496 flags: 0,
497 iova_pgsizes: 0,
498 ..Default::default()
499 };
500
501 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info) };
505 if ret != 0 || (iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES) == 0 {
506 return Err(VfioError::IommuGetInfo(get_error()));
507 }
508
509 Ok(iommu_info.iova_pgsizes)
510 }
511
512 pub fn vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
513 match self
514 .iommu_type
515 .expect("vfio_iommu_iova_get_iova_ranges called before configuring IOMMU")
516 {
517 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
518 self.vfio_iommu_type1_get_iova_ranges()
519 }
520 IommuType::PkvmPviommu => Ok(Vec::new()),
521 }
522 }
523
524 fn vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
525 let mut iommu_info_argsz = vfio_iommu_type1_info {
527 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
528 flags: 0,
529 iova_pgsizes: 0,
530 ..Default::default()
531 };
532
533 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info_argsz) };
537 if ret != 0 {
538 return Err(VfioError::IommuGetInfo(get_error()));
539 }
540
541 if (iommu_info_argsz.flags & VFIO_IOMMU_INFO_CAPS) == 0 {
542 return Err(VfioError::IommuGetCapInfo);
543 }
544
545 let mut iommu_info = vec_with_array_field::<vfio_iommu_type1_info, u8>(
546 iommu_info_argsz.argsz as usize - mem::size_of::<vfio_iommu_type1_info>(),
547 );
548 iommu_info[0].argsz = iommu_info_argsz.argsz;
549 let ret =
550 unsafe { ioctl_with_mut_ptr(self, VFIO_IOMMU_GET_INFO, iommu_info.as_mut_ptr()) };
554 if ret != 0 {
555 return Err(VfioError::IommuGetInfo(get_error()));
556 }
557
558 let info_bytes = unsafe {
562 std::slice::from_raw_parts(
563 iommu_info.as_ptr() as *const u8,
564 iommu_info_argsz.argsz as usize,
565 )
566 };
567
568 if (iommu_info[0].flags & VFIO_IOMMU_INFO_CAPS) == 0 {
569 return Err(VfioError::IommuGetCapInfo);
570 }
571
572 let mut offset = iommu_info[0].cap_offset as usize;
573 while offset != 0 {
574 let header = extract_vfio_struct::<vfio_info_cap_header>(info_bytes, offset)
575 .ok_or(VfioError::IommuGetCapInfo)?;
576
577 if header.id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE as u16 && header.version == 1 {
578 let iova_header =
579 extract_vfio_struct::<vfio_iommu_type1_info_cap_iova_range_header>(
580 info_bytes, offset,
581 )
582 .ok_or(VfioError::IommuGetCapInfo)?;
583 let range_offset = offset + mem::size_of::<vfio_iommu_type1_info_cap_iova_range>();
584 let mut ret = Vec::new();
585 for i in 0..iova_header.nr_iovas {
586 ret.push(
587 extract_vfio_struct::<vfio_iova_range>(
588 info_bytes,
589 range_offset + i as usize * mem::size_of::<vfio_iova_range>(),
590 )
591 .ok_or(VfioError::IommuGetCapInfo)?,
592 );
593 }
594 return Ok(ret
595 .iter()
596 .map(|range| AddressRange {
597 start: range.start,
598 end: range.end,
599 })
600 .collect());
601 }
602 offset = header.next as usize;
603 }
604
605 Err(VfioError::IommuGetCapInfo)
606 }
607
608 fn set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()> {
609 match iommu_dev {
610 IommuDevType::CoIommu | IommuDevType::VirtioIommu => {
611 self.set_iommu_checked(IommuType::Type1ChromeOS)
614 .or_else(|_| self.set_iommu_checked(IommuType::Type1V2))
615 }
616 IommuDevType::NoIommu => self.set_iommu_checked(IommuType::Type1V2),
617 IommuDevType::PkvmPviommu => self.set_iommu_checked(IommuType::PkvmPviommu),
618 }
619 }
620
621 fn get_group_with_vm(
622 &mut self,
623 id: u32,
624 vm: &impl Vm,
625 iommu_dev: IommuDevType,
626 ) -> Result<Arc<Mutex<VfioGroup>>> {
627 if let Some(group) = self.groups.get(&id) {
628 return Ok(group.clone());
629 }
630
631 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
632 if self.groups.is_empty() {
633 self.set_iommu_from(iommu_dev)?;
634 match iommu_dev {
639 IommuDevType::CoIommu | IommuDevType::PkvmPviommu | IommuDevType::VirtioIommu => {}
640 IommuDevType::NoIommu => {
641 for region in vm.get_memory().regions() {
642 unsafe {
645 self.vfio_dma_map(
646 region.guest_addr.0,
647 region.size as u64,
648 region.host_addr as u64,
649 true,
650 )
651 }?;
652 }
653 }
654 }
655 }
656
657 let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
658 group
659 .lock()
660 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Add)?;
661
662 self.groups.insert(id, group.clone());
663
664 Ok(group)
665 }
666
667 fn get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>> {
668 if let Some(group) = self.groups.get(&id) {
669 return Ok(group.clone());
670 }
671
672 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
673
674 if self.groups.is_empty() {
675 self.set_iommu_checked(IommuType::Type1V2)?;
678 }
679
680 self.groups.insert(id, group.clone());
681 Ok(group)
682 }
683
684 fn remove_group(&mut self, id: u32, reduce: bool) {
685 let mut remove = false;
686
687 if let Some(group) = self.groups.get(&id) {
688 if reduce {
689 group.lock().reduce_device_num();
690 }
691 if group.lock().device_num() == 0 {
692 let kvm_vfio_file = kvm_vfio_file().expect("kvm vfio file isn't created");
693 if group
694 .lock()
695 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Delete)
696 .is_err()
697 {
698 warn!("failing in remove vfio group from kvm device");
699 }
700 remove = true;
701 }
702 }
703
704 if remove {
705 self.groups.remove(&id);
706 }
707 }
708
709 pub fn clone_as_raw_descriptor(&self) -> Result<RawDescriptor> {
710 let raw_descriptor = unsafe { libc::dup(self.container.as_raw_descriptor()) };
713 if raw_descriptor < 0 {
714 Err(VfioError::ContainerDupError)
715 } else {
716 Ok(raw_descriptor)
717 }
718 }
719
720 pub fn group_ids(&self) -> Vec<&u32> {
722 self.groups.keys().collect()
723 }
724}
725
726impl AsRawDescriptor for VfioContainer {
727 fn as_raw_descriptor(&self) -> RawDescriptor {
728 self.container.as_raw_descriptor()
729 }
730}
731
732struct VfioGroup {
733 group: File,
734 device_num: u32,
735}
736
737impl VfioGroup {
738 fn new(container: &VfioContainer, id: u32) -> Result<Self> {
739 let group_path = format!("/dev/vfio/{id}");
740 let group_file = OpenOptions::new()
741 .read(true)
742 .write(true)
743 .open(Path::new(&group_path))
744 .map_err(|e| VfioError::OpenGroup(e, group_path))?;
745
746 let mut group_status = vfio_group_status {
747 argsz: mem::size_of::<vfio_group_status>() as u32,
748 flags: 0,
749 };
750 let mut ret =
751 unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS, &mut group_status) };
754 if ret < 0 {
755 return Err(VfioError::GetGroupStatus(get_error()));
756 }
757
758 if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
759 return Err(VfioError::GroupViable);
760 }
761
762 let container_raw_descriptor = container.as_raw_descriptor();
763 ret = unsafe {
767 ioctl_with_ref(
768 &group_file,
769 VFIO_GROUP_SET_CONTAINER,
770 &container_raw_descriptor,
771 )
772 };
773 if ret < 0 {
774 return Err(VfioError::GroupSetContainer(get_error()));
775 }
776
777 Ok(VfioGroup {
778 group: group_file,
779 device_num: 0,
780 })
781 }
782
783 fn get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32> {
784 let mut uuid_path = PathBuf::new();
785 uuid_path.push(sysfspath);
786 uuid_path.push("iommu_group");
787 let group_path = uuid_path
788 .read_link()
789 .map_err(|e| VfioError::ReadLink(e, uuid_path))?;
790 let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
791 let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
792 let group_id = group_str
793 .parse::<u32>()
794 .map_err(|_| VfioError::InvalidPath)?;
795
796 Ok(group_id)
797 }
798
799 fn kvm_device_set_group(
800 &self,
801 kvm_vfio_file: &SafeDescriptor,
802 ops: KvmVfioGroupOps,
803 ) -> Result<()> {
804 let group_descriptor = self.as_raw_descriptor();
805 let group_descriptor_ptr = &group_descriptor as *const i32;
806 let vfio_dev_attr = match ops {
807 KvmVfioGroupOps::Add => kvm_sys::kvm_device_attr {
808 flags: 0,
809 group: kvm_sys::KVM_DEV_VFIO_GROUP,
810 attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
811 addr: group_descriptor_ptr as u64,
812 },
813 KvmVfioGroupOps::Delete => kvm_sys::kvm_device_attr {
814 flags: 0,
815 group: kvm_sys::KVM_DEV_VFIO_GROUP,
816 attr: kvm_sys::KVM_DEV_VFIO_GROUP_DEL as u64,
817 addr: group_descriptor_ptr as u64,
818 },
819 };
820
821 if 0 != unsafe {
825 ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr)
826 } {
827 return Err(VfioError::KvmSetDeviceAttr(get_error()));
828 }
829
830 Ok(())
831 }
832
833 fn get_device(&self, name: &str) -> Result<File> {
834 let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
835 let path_ptr = path.as_ptr();
836
837 let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD, path_ptr) };
840 if ret < 0 {
841 return Err(VfioError::GroupGetDeviceFD(get_error()));
842 }
843
844 Ok(unsafe { File::from_raw_descriptor(ret) })
847 }
848
849 fn add_device_num(&mut self) {
850 self.device_num += 1;
851 }
852
853 fn reduce_device_num(&mut self) {
854 self.device_num -= 1;
855 }
856
857 fn device_num(&self) -> u32 {
858 self.device_num
859 }
860}
861
862impl AsRawDescriptor for VfioGroup {
863 fn as_raw_descriptor(&self) -> RawDescriptor {
864 self.group.as_raw_descriptor()
865 }
866}
867
868#[derive(Default)]
870pub struct VfioContainerManager {
871 no_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
873
874 iommu_containers: Vec<Arc<Mutex<VfioContainer>>>,
877
878 coiommu_container: Option<Arc<Mutex<VfioContainer>>>,
880
881 pkvm_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
883}
884
885impl VfioContainerManager {
886 pub fn new() -> Self {
887 Self::default()
888 }
889
890 pub fn get_container<P: AsRef<Path>>(
901 &mut self,
902 iommu_type: IommuDevType,
903 sysfspath: Option<P>,
904 ) -> Result<Arc<Mutex<VfioContainer>>> {
905 match iommu_type {
906 IommuDevType::NoIommu => {
907 if let Some(container) = &self.no_iommu_container {
909 Ok(container.clone())
910 } else {
911 let container = Arc::new(Mutex::new(VfioContainer::new()?));
912 self.no_iommu_container = Some(container.clone());
913 Ok(container)
914 }
915 }
916 IommuDevType::VirtioIommu => {
917 let path = sysfspath.ok_or(VfioError::InvalidPath)?;
918 let group_id = VfioGroup::get_group_id(path)?;
919
920 if let Some(container) = self
923 .iommu_containers
924 .iter()
925 .find(|container| container.lock().is_group_set(group_id))
926 {
927 Ok(container.clone())
928 } else {
929 let container = Arc::new(Mutex::new(VfioContainer::new()?));
930 self.iommu_containers.push(container.clone());
931 Ok(container)
932 }
933 }
934 IommuDevType::CoIommu => {
935 if let Some(container) = &self.coiommu_container {
937 Ok(container.clone())
938 } else {
939 let container = Arc::new(Mutex::new(VfioContainer::new()?));
940 self.coiommu_container = Some(container.clone());
941 Ok(container)
942 }
943 }
944 IommuDevType::PkvmPviommu => {
945 if let Some(container) = &self.pkvm_iommu_container {
947 Ok(container.clone())
948 } else {
949 let container = Arc::new(Mutex::new(VfioContainer::new()?));
950 self.pkvm_iommu_container = Some(container.clone());
951 Ok(container)
952 }
953 }
954 }
955 }
956}
957
958pub enum VfioIrqType {
960 Intx,
961 Msi,
962 Msix,
963}
964
965pub struct VfioIrq {
967 pub flags: u32,
968 pub index: u32,
969}
970
971#[derive(Debug, Default, Clone)]
973pub struct VfioRegionAddr {
974 pub index: usize,
976 pub addr: u64,
978}
979
980#[derive(Debug)]
981pub struct VfioRegion {
982 flags: u32,
984 size: u64,
985 offset: u64,
987 mmaps: Vec<vfio_region_sparse_mmap_area>,
989 cap_info: Option<(u32, u32)>,
991 msix_region_mmappable: bool,
994}
995
996pub struct VfioDevice {
998 dev: File,
999 name: String,
1000 container: Arc<Mutex<VfioContainer>>,
1001 dev_type: VfioDeviceType,
1002 group_descriptor: RawDescriptor,
1003 group_id: u32,
1004 regions: Vec<VfioRegion>,
1006 num_irqs: u32,
1007
1008 iova_alloc: Arc<Mutex<AddressAllocator>>,
1009 dt_symbol: Option<String>,
1010 pviommu: Option<(Arc<Mutex<KvmVfioPviommu>>, Vec<u32>)>,
1011}
1012
1013impl VfioDevice {
1014 pub fn new_passthrough<P: AsRef<Path>>(
1018 sysfspath: &P,
1019 vm: &impl Vm,
1020 container: Arc<Mutex<VfioContainer>>,
1021 iommu_dev: IommuDevType,
1022 dt_symbol: Option<String>,
1023 ) -> Result<Self> {
1024 let group_id = VfioGroup::get_group_id(sysfspath)?;
1025
1026 let group = container
1027 .lock()
1028 .get_group_with_vm(group_id, vm, iommu_dev)?;
1029 let name_osstr = sysfspath
1030 .as_ref()
1031 .file_name()
1032 .ok_or(VfioError::InvalidPath)?;
1033 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1034 let name = String::from(name_str);
1035 let dev = group.lock().get_device(&name)?;
1036 let (dev_info, dev_type) = Self::get_device_info(&dev)?;
1037 let regions = Self::get_regions(&dev, dev_info.num_regions)?;
1038 group.lock().add_device_num();
1039 let group_descriptor = group.lock().as_raw_descriptor();
1040
1041 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1042 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1043 .map_err(VfioError::Resources)?;
1044
1045 let pviommu = if matches!(iommu_dev, IommuDevType::PkvmPviommu) {
1046 let pviommu = KvmVfioPviommu::new(vm)?;
1048
1049 let vsids_len = KvmVfioPviommu::get_sid_count(vm, &dev)?.try_into().unwrap();
1050 let max_vsid = u32::MAX.try_into().unwrap();
1051 let random_vsids = sample(&mut rand::rng(), max_vsid, vsids_len).into_iter();
1052 let vsids = Vec::from_iter(random_vsids.map(|v| u32::try_from(v).unwrap()));
1053 for (i, vsid) in vsids.iter().enumerate() {
1054 pviommu.attach(&dev, i.try_into().unwrap(), *vsid)?;
1055 }
1056
1057 Some((Arc::new(Mutex::new(pviommu)), vsids))
1058 } else {
1059 None
1060 };
1061
1062 Ok(VfioDevice {
1063 dev,
1064 name,
1065 container,
1066 dev_type,
1067 group_descriptor,
1068 group_id,
1069 regions,
1070 num_irqs: dev_info.num_irqs,
1071 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1072 dt_symbol,
1073 pviommu,
1074 })
1075 }
1076
1077 pub fn new<P: AsRef<Path>>(
1078 sysfspath: &P,
1079 container: Arc<Mutex<VfioContainer>>,
1080 ) -> Result<Self> {
1081 let group_id = VfioGroup::get_group_id(sysfspath)?;
1082 let group = container.lock().get_group(group_id)?;
1083 let name_osstr = sysfspath
1084 .as_ref()
1085 .file_name()
1086 .ok_or(VfioError::InvalidPath)?;
1087 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1088 let name = String::from(name_str);
1089
1090 let dev = match group.lock().get_device(&name) {
1091 Ok(dev) => dev,
1092 Err(e) => {
1093 container.lock().remove_group(group_id, false);
1094 return Err(e);
1095 }
1096 };
1097 let (dev_info, dev_type) = match Self::get_device_info(&dev) {
1098 Ok(dev_info) => dev_info,
1099 Err(e) => {
1100 container.lock().remove_group(group_id, false);
1101 return Err(e);
1102 }
1103 };
1104 let regions = match Self::get_regions(&dev, dev_info.num_regions) {
1105 Ok(regions) => regions,
1106 Err(e) => {
1107 container.lock().remove_group(group_id, false);
1108 return Err(e);
1109 }
1110 };
1111 group.lock().add_device_num();
1112 let group_descriptor = group.lock().as_raw_descriptor();
1113
1114 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1115 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1116 .map_err(VfioError::Resources)?;
1117
1118 Ok(VfioDevice {
1119 dev,
1120 name,
1121 container,
1122 dev_type,
1123 group_descriptor,
1124 group_id,
1125 regions,
1126 num_irqs: dev_info.num_irqs,
1127 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1128 dt_symbol: None,
1129 pviommu: None,
1130 })
1131 }
1132
1133 pub fn dev_file(&self) -> &File {
1135 &self.dev
1136 }
1137
1138 pub fn device_name(&self) -> &String {
1140 &self.name
1141 }
1142
1143 pub fn device_type(&self) -> VfioDeviceType {
1145 self.dev_type
1146 }
1147
1148 pub fn dt_symbol(&self) -> Option<&str> {
1150 self.dt_symbol.as_deref()
1151 }
1152
1153 pub fn iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])> {
1156 if let Some((ref pviommu, ref ids)) = self.pviommu {
1158 Some((
1159 IommuDevType::PkvmPviommu,
1160 Some(pviommu.lock().id()),
1161 ids.as_ref(),
1162 ))
1163 } else {
1164 None
1165 }
1166 }
1167
1168 pub fn supports_pm_low_power(&self) -> bool {
1170 if self.probe_pm_low_power_entry().is_err() {
1171 false
1172 } else if self.probe_pm_low_power_exit().is_err() {
1173 warn!("VFIO supports LOW_POWER_ENTRY but not LOW_POWER_EXIT: ignoring feature");
1174 false
1175 } else {
1176 true
1177 }
1178 }
1179
1180 pub fn pm_low_power_enter(&self) -> Result<()> {
1182 self.device_feature(VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY)
1183 .map_err(VfioError::VfioPmLowPowerEnter)
1184 }
1185
1186 pub fn pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()> {
1188 let payload = vfio_device_low_power_entry_with_wakeup {
1189 wakeup_eventfd: wakeup_evt.as_raw_descriptor(),
1190 reserved: 0,
1191 };
1192 let payload_size = mem::size_of::<vfio_device_low_power_entry_with_wakeup>();
1193 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(payload_size);
1194 device_feature[0].argsz = (mem::size_of::<vfio_device_feature>() + payload_size) as u32;
1195 device_feature[0].flags =
1196 VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP;
1197 unsafe {
1200 device_feature[0]
1201 .data
1202 .as_mut_slice(payload_size)
1203 .copy_from_slice(
1204 mem::transmute::<vfio_device_low_power_entry_with_wakeup, [u8; 8]>(payload)
1205 .as_slice(),
1206 );
1207 }
1208 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1211 if ret < 0 {
1212 Err(VfioError::VfioPmLowPowerEnter(get_error()))
1213 } else {
1214 Ok(())
1215 }
1216 }
1217
1218 pub fn pm_low_power_exit(&self) -> Result<()> {
1220 self.device_feature(VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_EXIT)
1221 .map_err(VfioError::VfioPmLowPowerExit)
1222 }
1223
1224 fn probe_pm_low_power_entry(&self) -> Result<()> {
1225 self.device_feature(VFIO_DEVICE_FEATURE_PROBE | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY)
1226 .map_err(VfioError::VfioProbePmLowPowerEntry)
1227 }
1228
1229 fn probe_pm_low_power_exit(&self) -> Result<()> {
1230 self.device_feature(VFIO_DEVICE_FEATURE_PROBE | VFIO_DEVICE_FEATURE_LOW_POWER_EXIT)
1231 .map_err(VfioError::VfioProbePmLowPowerExit)
1232 }
1233
1234 fn device_feature(&self, flags: u32) -> result::Result<(), Error> {
1235 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1236 device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1237 device_feature[0].flags = flags;
1238 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1241 if ret < 0 {
1242 Err(get_error())
1243 } else {
1244 Ok(())
1245 }
1246 }
1247
1248 pub fn acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>> {
1250 let count = args.len();
1251 let mut dsm = vec_with_array_field::<vfio_acpi_dsm, u8>(count);
1252 dsm[0].argsz = (mem::size_of::<vfio_acpi_dsm>() + mem::size_of_val(args)) as u32;
1253 dsm[0].padding = 0;
1254 unsafe {
1257 dsm[0].args.as_mut_slice(count).clone_from_slice(args);
1258 }
1259 let ret = unsafe { ioctl_with_mut_ref(&self.dev, VFIO_DEVICE_ACPI_DSM, &mut dsm[0]) };
1262 if ret < 0 {
1263 Err(VfioError::VfioAcpiDsm(get_error()))
1264 } else {
1265 let res = unsafe { dsm[0].args.as_slice(count) };
1268 Ok(res.to_vec())
1269 }
1270 }
1271
1272 pub fn acpi_notification_evt_enable(
1274 &self,
1275 acpi_notification_eventfd: &Event,
1276 index: u32,
1277 ) -> Result<()> {
1278 let u32_size = mem::size_of::<u32>();
1279 let count = 1;
1280
1281 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1282 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1283 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1284 irq_set[0].index = index;
1285 irq_set[0].start = 0;
1286 irq_set[0].count = count as u32;
1287
1288 let data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1291 data.copy_from_slice(&acpi_notification_eventfd.as_raw_descriptor().to_ne_bytes()[..]);
1292
1293 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1296 if ret < 0 {
1297 Err(VfioError::VfioAcpiNotificationEnable(get_error()))
1298 } else {
1299 Ok(())
1300 }
1301 }
1302
1303 pub fn acpi_notification_disable(&self, index: u32) -> Result<()> {
1305 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1306 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1307 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1308 irq_set[0].index = index;
1309 irq_set[0].start = 0;
1310 irq_set[0].count = 0;
1311
1312 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1315 if ret < 0 {
1316 Err(VfioError::VfioAcpiNotificationDisable(get_error()))
1317 } else {
1318 Ok(())
1319 }
1320 }
1321
1322 pub fn acpi_notification_test(&self, index: u32, val: u32) -> Result<()> {
1326 let u32_size = mem::size_of::<u32>();
1327 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1328 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + u32_size) as u32;
1329 irq_set[0].flags = VFIO_IRQ_SET_DATA_BOOL | VFIO_IRQ_SET_ACTION_TRIGGER;
1330 irq_set[0].index = index;
1331 irq_set[0].start = 0;
1332 irq_set[0].count = 1;
1333
1334 let data = unsafe { irq_set[0].data.as_mut_slice(u32_size) };
1337 data.copy_from_slice(&val.to_ne_bytes()[..]);
1338
1339 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1342 if ret < 0 {
1343 Err(VfioError::VfioAcpiNotificationTest(get_error()))
1344 } else {
1345 Ok(())
1346 }
1347 }
1348
1349 pub fn irq_enable(
1357 &self,
1358 descriptors: &[Option<&Event>],
1359 index: u32,
1360 subindex: u32,
1361 ) -> Result<()> {
1362 let count = descriptors.len();
1363 let u32_size = mem::size_of::<u32>();
1364 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1365 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1366 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1367 irq_set[0].index = index;
1368 irq_set[0].start = subindex;
1369 irq_set[0].count = count as u32;
1370
1371 let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1377 for descriptor in descriptors.iter().take(count) {
1378 let (left, right) = data.split_at_mut(u32_size);
1379 match descriptor {
1380 Some(fd) => left.copy_from_slice(&fd.as_raw_descriptor().to_ne_bytes()[..]),
1381 None => left.copy_from_slice(&(-1i32).to_ne_bytes()[..]),
1382 }
1383 data = right;
1384 }
1385
1386 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1389 if ret < 0 {
1390 Err(VfioError::VfioIrqEnable(get_error()))
1391 } else {
1392 Ok(())
1393 }
1394 }
1395
1396 pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()> {
1406 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1407 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
1408 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
1409 irq_set[0].index = index;
1410 irq_set[0].start = 0;
1411 irq_set[0].count = 1;
1412
1413 {
1414 let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
1420 descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
1421 }
1422
1423 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1426 if ret < 0 {
1427 Err(VfioError::VfioIrqEnable(get_error()))
1428 } else {
1429 Ok(())
1430 }
1431 }
1432
1433 pub fn irq_disable(&self, index: u32) -> Result<()> {
1435 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1436 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1437 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1438 irq_set[0].index = index;
1439 irq_set[0].start = 0;
1440 irq_set[0].count = 0;
1441
1442 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1445 if ret < 0 {
1446 Err(VfioError::VfioIrqDisable(get_error()))
1447 } else {
1448 Ok(())
1449 }
1450 }
1451
1452 pub fn irq_unmask(&self, index: u32) -> Result<()> {
1454 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1455 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1456 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
1457 irq_set[0].index = index;
1458 irq_set[0].start = 0;
1459 irq_set[0].count = 1;
1460
1461 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1464 if ret < 0 {
1465 Err(VfioError::VfioIrqUnmask(get_error()))
1466 } else {
1467 Ok(())
1468 }
1469 }
1470
1471 pub fn irq_mask(&self, index: u32) -> Result<()> {
1473 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1474 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1475 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
1476 irq_set[0].index = index;
1477 irq_set[0].start = 0;
1478 irq_set[0].count = 1;
1479
1480 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1483 if ret < 0 {
1484 Err(VfioError::VfioIrqMask(get_error()))
1485 } else {
1486 Ok(())
1487 }
1488 }
1489
1490 fn get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)> {
1492 let mut dev_info = vfio_device_info {
1493 argsz: mem::size_of::<vfio_device_info>() as u32,
1494 flags: 0,
1495 num_regions: 0,
1496 num_irqs: 0,
1497 ..Default::default()
1498 };
1499
1500 let ret = unsafe { ioctl_with_mut_ref(device_file, VFIO_DEVICE_GET_INFO, &mut dev_info) };
1504 if ret < 0 {
1505 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1506 }
1507
1508 let dev_type = if (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) != 0 {
1509 if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
1510 || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
1511 {
1512 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1513 }
1514
1515 VfioDeviceType::Pci
1516 } else if (dev_info.flags & VFIO_DEVICE_FLAGS_PLATFORM) != 0 {
1517 VfioDeviceType::Platform
1518 } else {
1519 return Err(VfioError::UnknownDeviceType(dev_info.flags));
1520 };
1521
1522 Ok((dev_info, dev_type))
1523 }
1524
1525 pub fn get_irqs(&self) -> Result<Vec<VfioIrq>> {
1528 let mut irqs: Vec<VfioIrq> = Vec::new();
1529
1530 for i in 0..self.num_irqs {
1531 let argsz = mem::size_of::<vfio_irq_info>() as u32;
1532 let mut irq_info = vfio_irq_info {
1533 argsz,
1534 flags: 0,
1535 index: i,
1536 count: 0,
1537 };
1538 let ret = unsafe {
1542 ioctl_with_mut_ref(self.device_file(), VFIO_DEVICE_GET_IRQ_INFO, &mut irq_info)
1543 };
1544 if ret < 0 || irq_info.count != 1 {
1545 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1546 }
1547
1548 let irq = VfioIrq {
1549 flags: irq_info.flags,
1550 index: irq_info.index,
1551 };
1552 irqs.push(irq);
1553 }
1554 Ok(irqs)
1555 }
1556
1557 #[allow(clippy::cast_ptr_alignment)]
1558 fn get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>> {
1559 let mut regions: Vec<VfioRegion> = Vec::new();
1560 for i in 0..num_regions {
1561 let argsz = mem::size_of::<vfio_region_info>() as u32;
1562 let mut reg_info = vfio_region_info {
1563 argsz,
1564 flags: 0,
1565 index: i,
1566 cap_offset: 0,
1567 size: 0,
1568 offset: 0,
1569 };
1570 let ret =
1571 unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO, &mut reg_info) };
1575 if ret < 0 {
1576 continue;
1577 }
1578
1579 let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
1580 let mut cap_info: Option<(u32, u32)> = None;
1581 let mut msix_region_mmappable = false;
1582 if reg_info.argsz > argsz {
1583 let cap_len: usize = (reg_info.argsz - argsz) as usize;
1584 let mut region_with_cap =
1585 vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
1586 region_with_cap[0].region_info.argsz = reg_info.argsz;
1587 region_with_cap[0].region_info.flags = 0;
1588 region_with_cap[0].region_info.index = i;
1589 region_with_cap[0].region_info.cap_offset = 0;
1590 region_with_cap[0].region_info.size = 0;
1591 region_with_cap[0].region_info.offset = 0;
1592 let ret = unsafe {
1596 ioctl_with_mut_ref(
1597 dev,
1598 VFIO_DEVICE_GET_REGION_INFO,
1599 &mut (region_with_cap[0].region_info),
1600 )
1601 };
1602 if ret < 0 {
1603 return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
1604 }
1605
1606 reg_info = region_with_cap[0].region_info;
1613
1614 if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
1615 continue;
1616 }
1617
1618 let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
1619 let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
1620 let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
1621 let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
1622 let region_info_sz = reg_info.argsz;
1623
1624 let info_ptr = region_with_cap.as_ptr() as *mut u8;
1630 let mut offset = region_with_cap[0].region_info.cap_offset;
1631 while offset != 0 {
1632 if offset + cap_header_sz > region_info_sz {
1633 break;
1634 }
1635 let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
1639 let cap_header = unsafe { &*(cap_ptr as *const vfio_info_cap_header) };
1643 if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
1644 if offset + mmap_cap_sz > region_info_sz {
1645 break;
1646 }
1647 let sparse_mmap =
1649 unsafe { &*(cap_ptr as *const vfio_region_info_cap_sparse_mmap) };
1653
1654 let area_num = sparse_mmap.nr_areas;
1655 if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
1656 break;
1657 }
1658 let areas =
1659 unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
1663 for area in areas.iter() {
1664 mmaps.push(*area);
1665 }
1666
1667 msix_region_mmappable = true;
1672 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
1673 if offset + type_cap_sz > region_info_sz {
1674 break;
1675 }
1676 let cap_type_info =
1678 unsafe { &*(cap_ptr as *const vfio_region_info_cap_type) };
1682
1683 cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
1684 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_MSIX_MAPPABLE {
1685 mmaps.push(vfio_region_sparse_mmap_area {
1686 offset: 0,
1687 size: region_with_cap[0].region_info.size,
1688 });
1689 msix_region_mmappable = true;
1690 }
1691
1692 offset = cap_header.next;
1693 }
1694 } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1695 mmaps.push(vfio_region_sparse_mmap_area {
1696 offset: 0,
1697 size: reg_info.size,
1698 });
1699 }
1700
1701 let region = VfioRegion {
1702 flags: reg_info.flags,
1703 size: reg_info.size,
1704 offset: reg_info.offset,
1705 mmaps,
1706 cap_info,
1707 msix_region_mmappable,
1708 };
1709 regions.push(region);
1710 }
1711
1712 Ok(regions)
1713 }
1714
1715 pub fn get_region_flags(&self, index: usize) -> u32 {
1722 match self.regions.get(index) {
1723 Some(v) => v.flags,
1724 None => {
1725 warn!("get_region_flags() with invalid index: {}", index);
1726 0
1727 }
1728 }
1729 }
1730
1731 pub fn get_region_offset(&self, index: usize) -> u64 {
1734 match self.regions.get(index) {
1735 Some(v) => v.offset,
1736 None => {
1737 warn!("get_region_offset with invalid index: {}", index);
1738 0
1739 }
1740 }
1741 }
1742
1743 pub fn get_region_size(&self, index: usize) -> u64 {
1746 match self.regions.get(index) {
1747 Some(v) => v.size,
1748 None => {
1749 warn!("get_region_size with invalid index: {}", index);
1750 0
1751 }
1752 }
1753 }
1754
1755 pub fn get_region_count(&self) -> usize {
1758 self.regions.len()
1759 }
1760
1761 pub fn get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area> {
1763 match self.regions.get(index) {
1764 Some(v) => v.mmaps.clone(),
1765 None => {
1766 warn!("get_region_mmap with invalid index: {}", index);
1767 Vec::new()
1768 }
1769 }
1770 }
1771
1772 pub fn get_region_msix_mmappable(&self, index: usize) -> bool {
1775 match self.regions.get(index) {
1776 Some(v) => v.msix_region_mmappable,
1777 None => {
1778 warn!("get_region_msix_mmappable with invalid index: {}", index);
1779 false
1780 }
1781 }
1782 }
1783
1784 pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
1793 for (index, region) in self.regions.iter().enumerate() {
1794 if let Some(cap_info) = ®ion.cap_info {
1795 if cap_info.0 == type_ && cap_info.1 == sub_type {
1796 return Some((index as u32, region.size));
1797 }
1798 }
1799 }
1800
1801 None
1802 }
1803
1804 pub fn get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64> {
1807 let region = self
1808 .regions
1809 .get(addr.index)
1810 .ok_or(VfioError::InvalidIndex(addr.index))?;
1811 Ok(region.offset + addr.addr)
1812 }
1813
1814 pub fn region_read(&self, index: usize, buf: &mut [u8], addr: u64) {
1819 let stub: &VfioRegion = self
1820 .regions
1821 .get(index)
1822 .unwrap_or_else(|| panic!("tried to read VFIO with an invalid index: {index}"));
1823
1824 let size = buf.len() as u64;
1825 if size > stub.size || addr + size > stub.size {
1826 panic!(
1827 "tried to read VFIO region with invalid arguments: index={index}, addr=0x{addr:x}, size=0x{size:x}"
1828 );
1829 }
1830
1831 self.dev
1832 .read_exact_at(buf, stub.offset + addr)
1833 .unwrap_or_else(|e| {
1834 panic!("failed to read region: index={index}, addr=0x{addr:x}, error={e}")
1835 });
1836 }
1837
1838 pub fn region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T {
1840 let mut val = mem::MaybeUninit::zeroed();
1841 let buf =
1842 unsafe { slice::from_raw_parts_mut(val.as_mut_ptr() as *mut u8, mem::size_of::<T>()) };
1845 self.region_read(addr.index, buf, addr.addr + offset);
1846 unsafe { val.assume_init() }
1849 }
1850
1851 pub fn region_write(&self, index: usize, buf: &[u8], addr: u64) {
1856 let stub: &VfioRegion = self
1857 .regions
1858 .get(index)
1859 .unwrap_or_else(|| panic!("tried to write VFIO with an invalid index: {index}"));
1860
1861 let size = buf.len() as u64;
1862 if size > stub.size
1863 || addr + size > stub.size
1864 || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1865 {
1866 panic!(
1867 "tried to write VFIO region with invalid arguments: index={index}, addr=0x{addr:x}, size=0x{size:x}"
1868 );
1869 }
1870
1871 self.dev
1872 .write_all_at(buf, stub.offset + addr)
1873 .unwrap_or_else(|e| {
1874 panic!("failed to write region: index={index}, addr=0x{addr:x}, error={e}")
1875 });
1876 }
1877
1878 pub fn region_write_to_addr(&self, data: &[u8], addr: &VfioRegionAddr, offset: u64) {
1880 self.region_write(addr.index, data, addr.addr + offset);
1881 }
1882
1883 pub fn keep_rds(&self) -> Vec<RawDescriptor> {
1885 vec![
1886 self.dev.as_raw_descriptor(),
1887 self.group_descriptor,
1888 self.container.lock().as_raw_descriptor(),
1889 ]
1890 }
1891
1892 pub unsafe fn vfio_dma_map(
1897 &self,
1898 iova: u64,
1899 size: u64,
1900 user_addr: u64,
1901 write_en: bool,
1902 ) -> Result<()> {
1903 self.container
1904 .lock()
1905 .vfio_dma_map(iova, size, user_addr, write_en)
1906 }
1907
1908 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
1910 self.container.lock().vfio_dma_unmap(iova, size)
1911 }
1912
1913 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
1914 self.container.lock().vfio_get_iommu_page_size_mask()
1915 }
1916
1917 pub fn alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64> {
1918 self.iova_alloc
1919 .lock()
1920 .allocate_with_align(size, alloc, "alloc_iova".to_owned(), align_size)
1921 .map_err(VfioError::Resources)
1922 }
1923
1924 pub fn get_iova(&self, alloc: &Alloc) -> Option<AddressRange> {
1925 self.iova_alloc.lock().get(alloc).map(|res| res.0)
1926 }
1927
1928 pub fn release_iova(&self, alloc: Alloc) -> Result<AddressRange> {
1929 self.iova_alloc
1930 .lock()
1931 .release(alloc)
1932 .map_err(VfioError::Resources)
1933 }
1934
1935 pub fn get_max_addr(&self) -> u64 {
1936 self.iova_alloc.lock().get_max_addr()
1937 }
1938
1939 pub fn device_file(&self) -> &File {
1941 &self.dev
1942 }
1943
1944 pub fn close(&self) {
1946 self.container.lock().remove_group(self.group_id, true);
1947 }
1948}
1949
1950pub struct VfioPciConfig {
1951 device: Arc<VfioDevice>,
1952}
1953
1954impl VfioPciConfig {
1955 pub fn new(device: Arc<VfioDevice>) -> Self {
1956 VfioPciConfig { device }
1957 }
1958
1959 pub fn read_config<T: IntoBytes + FromBytes>(&self, offset: u32) -> T {
1960 let mut config = T::new_zeroed();
1961 self.device.region_read(
1962 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1963 config.as_mut_bytes(),
1964 offset.into(),
1965 );
1966 config
1967 }
1968
1969 pub fn write_config<T: Immutable + IntoBytes>(&self, config: T, offset: u32) {
1970 self.device.region_write(
1971 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1972 config.as_bytes(),
1973 offset.into(),
1974 );
1975 }
1976
1977 pub fn set_bus_master(&self) {
1979 const PCI_COMMAND: u32 = 0x4;
1981 const PCI_COMMAND_MASTER: u16 = 0x4;
1983
1984 let mut cmd: u16 = self.read_config(PCI_COMMAND);
1985
1986 if cmd & PCI_COMMAND_MASTER != 0 {
1987 return;
1988 }
1989
1990 cmd |= PCI_COMMAND_MASTER;
1991
1992 self.write_config(cmd, PCI_COMMAND);
1993 }
1994}
1995
1996impl AsRawDescriptor for VfioDevice {
1997 fn as_raw_descriptor(&self) -> RawDescriptor {
1998 self.dev.as_raw_descriptor()
1999 }
2000}