1use std::collections::HashMap;
6use std::ffi::CString;
7use std::fs::File;
8use std::fs::OpenOptions;
9use std::io;
10use std::mem;
11use std::os::raw::c_ulong;
12use std::os::unix::prelude::FileExt;
13use std::path::Path;
14use std::path::PathBuf;
15#[cfg(all(target_os = "android", target_arch = "aarch64"))]
16use std::ptr::addr_of_mut;
17use std::result;
18use std::slice;
19use std::sync::Arc;
20use std::sync::OnceLock;
21
22use base::error;
23use base::ioctl;
24use base::ioctl_with_mut_ptr;
25use base::ioctl_with_mut_ref;
26use base::ioctl_with_ptr;
27use base::ioctl_with_ref;
28use base::ioctl_with_val;
29use base::warn;
30use base::AsRawDescriptor;
31use base::Error;
32use base::Event;
33use base::FromRawDescriptor;
34use base::RawDescriptor;
35use base::SafeDescriptor;
36use cfg_if::cfg_if;
37use data_model::vec_with_array_field;
38use hypervisor::DeviceKind;
39use hypervisor::Vm;
40use rand::seq::index::sample;
41use remain::sorted;
42use resources::address_allocator::AddressAllocator;
43use resources::AddressRange;
44use resources::Alloc;
45use resources::Error as ResourcesError;
46use sync::Mutex;
47use thiserror::Error;
48use vfio_sys::vfio::vfio_acpi_dsm;
49use vfio_sys::vfio::VFIO_IRQ_SET_DATA_BOOL;
50use vfio_sys::*;
51use zerocopy::FromBytes;
52use zerocopy::Immutable;
53use zerocopy::IntoBytes;
54
55use crate::IommuDevType;
56
57#[sorted]
58#[derive(Error, Debug)]
59pub enum VfioError {
60 #[error("failed to duplicate VfioContainer")]
61 ContainerDupError,
62 #[error("failed to set container's IOMMU driver type as {0:?}: {1}")]
63 ContainerSetIOMMU(IommuType, Error),
64 #[error("failed to create KVM vfio device")]
65 CreateVfioKvmDevice,
66 #[error("failed to get Group Status: {0}")]
67 GetGroupStatus(Error),
68 #[error("failed to get vfio device fd: {0}")]
69 GroupGetDeviceFD(Error),
70 #[error("failed to add vfio group into vfio container: {0}")]
71 GroupSetContainer(Error),
72 #[error("group is inviable")]
73 GroupViable,
74 #[error("invalid region index: {0}")]
75 InvalidIndex(usize),
76 #[error("invalid operation")]
77 InvalidOperation,
78 #[error("invalid file path")]
79 InvalidPath,
80 #[error("failed to add guest memory map into iommu table: {0}")]
81 IommuDmaMap(Error),
82 #[error("failed to remove guest memory map from iommu table: {0}")]
83 IommuDmaUnmap(Error),
84 #[error("failed to get IOMMU cap info from host")]
85 IommuGetCapInfo,
86 #[error("failed to get IOMMU info from host: {0}")]
87 IommuGetInfo(Error),
88 #[error("failed to attach device to pKVM pvIOMMU: {0}")]
89 KvmPviommuSetConfig(Error),
90 #[error("failed to set KVM vfio device's attribute: {0}")]
91 KvmSetDeviceAttr(Error),
92 #[error("AddressAllocator is unavailable")]
93 NoRescAlloc,
94 #[error("failed to open /dev/vfio/vfio container: {0}")]
95 OpenContainer(io::Error),
96 #[error("failed to open {1} group: {0}")]
97 OpenGroup(io::Error, String),
98 #[error("failed to read {1} link: {0}")]
99 ReadLink(io::Error, PathBuf),
100 #[error("resources error: {0}")]
101 Resources(ResourcesError),
102 #[error("unknown vfio device type (flags: {0:#x})")]
103 UnknownDeviceType(u32),
104 #[error("failed to call vfio device's ACPI _DSM: {0}")]
105 VfioAcpiDsm(Error),
106 #[error("failed to disable vfio device's acpi notification: {0}")]
107 VfioAcpiNotificationDisable(Error),
108 #[error("failed to enable vfio device's acpi notification: {0}")]
109 VfioAcpiNotificationEnable(Error),
110 #[error("failed to test vfio device's acpi notification: {0}")]
111 VfioAcpiNotificationTest(Error),
112 #[error(
113 "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/src/vfio.rs"
114 )]
115 VfioApiVersion,
116 #[error("failed to get vfio device's info or info doesn't match: {0}")]
117 VfioDeviceGetInfo(Error),
118 #[error("failed to get vfio device's region info: {0}")]
119 VfioDeviceGetRegionInfo(Error),
120 #[error("container doesn't support IOMMU driver type {0:?}")]
121 VfioIommuSupport(IommuType),
122 #[error("failed to disable vfio device's irq: {0}")]
123 VfioIrqDisable(Error),
124 #[error("failed to enable vfio device's irq: {0}")]
125 VfioIrqEnable(Error),
126 #[error("failed to mask vfio device's irq: {0}")]
127 VfioIrqMask(Error),
128 #[error("failed to unmask vfio device's irq: {0}")]
129 VfioIrqUnmask(Error),
130 #[error("failed to enter vfio device's low power state: {0}")]
131 VfioPmLowPowerEnter(Error),
132 #[error("failed to exit vfio device's low power state: {0}")]
133 VfioPmLowPowerExit(Error),
134}
135
136type Result<T> = std::result::Result<T, VfioError>;
137
138fn get_error() -> Error {
139 Error::last()
140}
141
142static KVM_VFIO_FILE: OnceLock<Option<SafeDescriptor>> = OnceLock::new();
143
144fn create_kvm_vfio_file(vm: &impl Vm) -> Option<&'static SafeDescriptor> {
145 KVM_VFIO_FILE
146 .get_or_init(|| vm.create_device(DeviceKind::Vfio).ok())
147 .as_ref()
148}
149
150fn kvm_vfio_file() -> Option<&'static SafeDescriptor> {
151 match KVM_VFIO_FILE.get() {
152 Some(Some(v)) => Some(v),
153 _ => None,
154 }
155}
156
157#[derive(Copy, Clone, Debug, PartialEq, Eq)]
158pub enum VfioDeviceType {
159 Pci,
160 Platform,
161}
162
163enum KvmVfioGroupOps {
164 Add,
165 Delete,
166}
167
168#[derive(Debug)]
169pub struct KvmVfioPviommu {
170 file: File,
171}
172
173impl KvmVfioPviommu {
174 pub fn new(vm: &impl Vm) -> Result<Self> {
175 cfg_if! {
176 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
177 let file = Self::ioctl_kvm_dev_vfio_pviommu_attach(vm)?;
178
179 Ok(Self { file })
180 } else {
181 let _ = vm;
182 unimplemented!()
183 }
184 }
185 }
186
187 pub fn attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()> {
188 cfg_if! {
189 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
190 self.ioctl_kvm_pviommu_set_config(device, sid_idx, vsid)
191 } else {
192 let _ = device;
193 let _ = sid_idx;
194 let _ = vsid;
195 unimplemented!()
196 }
197 }
198 }
199
200 pub fn id(&self) -> u32 {
201 let fd = self.as_raw_descriptor();
202 fd.try_into().unwrap()
204 }
205
206 pub fn get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32> {
207 cfg_if! {
208 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
209 let info = Self::ioctl_kvm_dev_vfio_pviommu_get_info(vm, device)?;
210
211 Ok(info.nr_sids)
212 } else {
213 let _ = vm;
214 let _ = device;
215 unimplemented!()
216 }
217 }
218 }
219
220 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
221 fn ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File> {
222 let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
223
224 let vfio_dev_attr = kvm_sys::kvm_device_attr {
225 flags: 0,
226 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
227 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_ATTACH as u64,
228 addr: 0,
229 };
230
231 let ret =
234 unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
235
236 if ret < 0 {
237 Err(VfioError::KvmSetDeviceAttr(get_error()))
238 } else {
239 Ok(unsafe { File::from_raw_descriptor(ret) })
241 }
242 }
243
244 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
245 fn ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>(
246 &self,
247 device: &T,
248 sid_idx: u32,
249 vsid: u32,
250 ) -> Result<()> {
251 let config = kvm_sys::kvm_vfio_iommu_config {
252 size: mem::size_of::<kvm_sys::kvm_vfio_iommu_config>() as u32,
253 device_fd: device.as_raw_descriptor(),
254 sid_idx,
255 vsid,
256 __reserved: 0,
257 };
258
259 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_PVIOMMU_SET_CONFIG, &config) };
263
264 if ret < 0 {
265 Err(VfioError::KvmPviommuSetConfig(get_error()))
266 } else {
267 Ok(())
268 }
269 }
270
271 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
272 fn ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>(
273 vm: &impl Vm,
274 device: &T,
275 ) -> Result<kvm_sys::kvm_vfio_iommu_info> {
276 let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
277
278 let mut info = kvm_sys::kvm_vfio_iommu_info {
279 size: mem::size_of::<kvm_sys::kvm_vfio_iommu_info>() as u32,
280 device_fd: device.as_raw_descriptor(),
281 nr_sids: 0,
282 __reserved: 0,
283 };
284
285 let vfio_dev_attr = kvm_sys::kvm_device_attr {
286 flags: 0,
287 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
288 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_GET_INFO as u64,
289 addr: addr_of_mut!(info) as usize as u64,
290 };
291
292 let ret =
295 unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
296
297 if ret < 0 {
298 Err(VfioError::KvmSetDeviceAttr(get_error()))
299 } else {
300 Ok(info)
301 }
302 }
303}
304
305impl AsRawDescriptor for KvmVfioPviommu {
306 fn as_raw_descriptor(&self) -> RawDescriptor {
307 self.file.as_raw_descriptor()
308 }
309}
310
311#[repr(u32)]
312#[derive(Copy, Clone, Debug, PartialEq, Eq)]
313pub enum IommuType {
314 Type1V2 = VFIO_TYPE1v2_IOMMU,
315 PkvmPviommu = VFIO_PKVM_PVIOMMU,
316 Type1ChromeOS = 100001,
322}
323
324pub struct VfioContainer {
326 container: File,
327 groups: HashMap<u32, Arc<Mutex<VfioGroup>>>,
328 iommu_type: Option<IommuType>,
329}
330
331fn extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T>
332where
333 T: FromBytes,
334{
335 Some(T::read_from_prefix(bytes.get(offset..)?).ok()?.0)
336}
337
338const VFIO_API_VERSION: u8 = 0;
339impl VfioContainer {
340 pub fn new() -> Result<Self> {
341 let container = OpenOptions::new()
342 .read(true)
343 .write(true)
344 .open("/dev/vfio/vfio")
345 .map_err(VfioError::OpenContainer)?;
346
347 Self::new_from_container(container)
348 }
349
350 pub fn new_from_container(container: File) -> Result<Self> {
352 let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION) };
355 if version as u8 != VFIO_API_VERSION {
356 return Err(VfioError::VfioApiVersion);
357 }
358
359 Ok(VfioContainer {
360 container,
361 groups: HashMap::new(),
362 iommu_type: None,
363 })
364 }
365
366 fn is_group_set(&self, group_id: u32) -> bool {
367 self.groups.contains_key(&group_id)
368 }
369
370 fn check_extension(&self, val: IommuType) -> bool {
371 let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION, val as c_ulong) };
374 ret != 0
375 }
376
377 fn set_iommu(&mut self, val: IommuType) -> i32 {
378 unsafe { ioctl_with_val(self, VFIO_SET_IOMMU, val as c_ulong) }
381 }
382
383 fn set_iommu_checked(&mut self, val: IommuType) -> Result<()> {
384 if !self.check_extension(val) {
385 Err(VfioError::VfioIommuSupport(val))
386 } else if self.set_iommu(val) != 0 {
387 Err(VfioError::ContainerSetIOMMU(val, get_error()))
388 } else {
389 self.iommu_type = Some(val);
390 Ok(())
391 }
392 }
393
394 pub unsafe fn vfio_dma_map(
398 &self,
399 iova: u64,
400 size: u64,
401 user_addr: u64,
402 write_en: bool,
403 ) -> Result<()> {
404 match self
405 .iommu_type
406 .expect("vfio_dma_map called before configuring IOMMU")
407 {
408 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
409 self.vfio_iommu_type1_dma_map(iova, size, user_addr, write_en)
410 }
411 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
412 }
413 }
414
415 unsafe fn vfio_iommu_type1_dma_map(
419 &self,
420 iova: u64,
421 size: u64,
422 user_addr: u64,
423 write_en: bool,
424 ) -> Result<()> {
425 let mut dma_map = vfio_iommu_type1_dma_map {
426 argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
427 flags: VFIO_DMA_MAP_FLAG_READ,
428 vaddr: user_addr,
429 iova,
430 size,
431 };
432
433 if write_en {
434 dma_map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
435 }
436
437 let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA, &dma_map);
438 if ret != 0 {
439 return Err(VfioError::IommuDmaMap(get_error()));
440 }
441
442 Ok(())
443 }
444
445 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
446 match self
447 .iommu_type
448 .expect("vfio_dma_unmap called before configuring IOMMU")
449 {
450 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
451 self.vfio_iommu_type1_dma_unmap(iova, size)
452 }
453 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
454 }
455 }
456
457 fn vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
458 let mut dma_unmap = vfio_iommu_type1_dma_unmap {
459 argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
460 flags: 0,
461 iova,
462 size,
463 ..Default::default()
464 };
465
466 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA, &mut dma_unmap) };
470 if ret != 0 || dma_unmap.size != size {
471 return Err(VfioError::IommuDmaUnmap(get_error()));
472 }
473
474 Ok(())
475 }
476
477 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
478 match self
479 .iommu_type
480 .expect("vfio_get_iommu_page_size_mask called before configuring IOMMU")
481 {
482 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
483 self.vfio_iommu_type1_get_iommu_page_size_mask()
484 }
485 IommuType::PkvmPviommu => Ok(0),
486 }
487 }
488
489 fn vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64> {
490 let mut iommu_info = vfio_iommu_type1_info {
491 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
492 flags: 0,
493 iova_pgsizes: 0,
494 ..Default::default()
495 };
496
497 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info) };
501 if ret != 0 || (iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES) == 0 {
502 return Err(VfioError::IommuGetInfo(get_error()));
503 }
504
505 Ok(iommu_info.iova_pgsizes)
506 }
507
508 pub fn vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
509 match self
510 .iommu_type
511 .expect("vfio_iommu_iova_get_iova_ranges called before configuring IOMMU")
512 {
513 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
514 self.vfio_iommu_type1_get_iova_ranges()
515 }
516 IommuType::PkvmPviommu => Ok(Vec::new()),
517 }
518 }
519
520 fn vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
521 let mut iommu_info_argsz = vfio_iommu_type1_info {
523 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
524 flags: 0,
525 iova_pgsizes: 0,
526 ..Default::default()
527 };
528
529 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info_argsz) };
533 if ret != 0 {
534 return Err(VfioError::IommuGetInfo(get_error()));
535 }
536
537 if (iommu_info_argsz.flags & VFIO_IOMMU_INFO_CAPS) == 0 {
538 return Err(VfioError::IommuGetCapInfo);
539 }
540
541 let mut iommu_info = vec_with_array_field::<vfio_iommu_type1_info, u8>(
542 iommu_info_argsz.argsz as usize - mem::size_of::<vfio_iommu_type1_info>(),
543 );
544 iommu_info[0].argsz = iommu_info_argsz.argsz;
545 let ret =
546 unsafe { ioctl_with_mut_ptr(self, VFIO_IOMMU_GET_INFO, iommu_info.as_mut_ptr()) };
550 if ret != 0 {
551 return Err(VfioError::IommuGetInfo(get_error()));
552 }
553
554 let info_bytes = unsafe {
558 std::slice::from_raw_parts(
559 iommu_info.as_ptr() as *const u8,
560 iommu_info_argsz.argsz as usize,
561 )
562 };
563
564 if (iommu_info[0].flags & VFIO_IOMMU_INFO_CAPS) == 0 {
565 return Err(VfioError::IommuGetCapInfo);
566 }
567
568 let mut offset = iommu_info[0].cap_offset as usize;
569 while offset != 0 {
570 let header = extract_vfio_struct::<vfio_info_cap_header>(info_bytes, offset)
571 .ok_or(VfioError::IommuGetCapInfo)?;
572
573 if header.id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE as u16 && header.version == 1 {
574 let iova_header =
575 extract_vfio_struct::<vfio_iommu_type1_info_cap_iova_range_header>(
576 info_bytes, offset,
577 )
578 .ok_or(VfioError::IommuGetCapInfo)?;
579 let range_offset = offset + mem::size_of::<vfio_iommu_type1_info_cap_iova_range>();
580 let mut ret = Vec::new();
581 for i in 0..iova_header.nr_iovas {
582 ret.push(
583 extract_vfio_struct::<vfio_iova_range>(
584 info_bytes,
585 range_offset + i as usize * mem::size_of::<vfio_iova_range>(),
586 )
587 .ok_or(VfioError::IommuGetCapInfo)?,
588 );
589 }
590 return Ok(ret
591 .iter()
592 .map(|range| AddressRange {
593 start: range.start,
594 end: range.end,
595 })
596 .collect());
597 }
598 offset = header.next as usize;
599 }
600
601 Err(VfioError::IommuGetCapInfo)
602 }
603
604 fn set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()> {
605 match iommu_dev {
606 IommuDevType::CoIommu | IommuDevType::VirtioIommu => {
607 self.set_iommu_checked(IommuType::Type1ChromeOS)
610 .or_else(|_| self.set_iommu_checked(IommuType::Type1V2))
611 }
612 IommuDevType::NoIommu => self.set_iommu_checked(IommuType::Type1V2),
613 IommuDevType::PkvmPviommu => self.set_iommu_checked(IommuType::PkvmPviommu),
614 }
615 }
616
617 fn get_group_with_vm(
618 &mut self,
619 id: u32,
620 vm: &impl Vm,
621 iommu_dev: IommuDevType,
622 ) -> Result<Arc<Mutex<VfioGroup>>> {
623 if let Some(group) = self.groups.get(&id) {
624 return Ok(group.clone());
625 }
626
627 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
628 if self.groups.is_empty() {
629 self.set_iommu_from(iommu_dev)?;
630 match iommu_dev {
635 IommuDevType::CoIommu | IommuDevType::PkvmPviommu | IommuDevType::VirtioIommu => {}
636 IommuDevType::NoIommu => {
637 for region in vm.get_memory().regions() {
638 unsafe {
641 self.vfio_dma_map(
642 region.guest_addr.0,
643 region.size as u64,
644 region.host_addr as u64,
645 true,
646 )
647 }?;
648 }
649 }
650 }
651 }
652
653 let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
654 group
655 .lock()
656 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Add)?;
657
658 self.groups.insert(id, group.clone());
659
660 Ok(group)
661 }
662
663 fn get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>> {
664 if let Some(group) = self.groups.get(&id) {
665 return Ok(group.clone());
666 }
667
668 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
669
670 if self.groups.is_empty() {
671 self.set_iommu_checked(IommuType::Type1V2)?;
674 }
675
676 self.groups.insert(id, group.clone());
677 Ok(group)
678 }
679
680 fn remove_group(&mut self, id: u32, reduce: bool) {
681 let mut remove = false;
682
683 if let Some(group) = self.groups.get(&id) {
684 if reduce {
685 group.lock().reduce_device_num();
686 }
687 if group.lock().device_num() == 0 {
688 let kvm_vfio_file = kvm_vfio_file().expect("kvm vfio file isn't created");
689 if group
690 .lock()
691 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Delete)
692 .is_err()
693 {
694 warn!("failing in remove vfio group from kvm device");
695 }
696 remove = true;
697 }
698 }
699
700 if remove {
701 self.groups.remove(&id);
702 }
703 }
704
705 pub fn clone_as_raw_descriptor(&self) -> Result<RawDescriptor> {
706 let raw_descriptor = unsafe { libc::dup(self.container.as_raw_descriptor()) };
709 if raw_descriptor < 0 {
710 Err(VfioError::ContainerDupError)
711 } else {
712 Ok(raw_descriptor)
713 }
714 }
715
716 pub fn group_ids(&self) -> Vec<&u32> {
718 self.groups.keys().collect()
719 }
720}
721
722impl AsRawDescriptor for VfioContainer {
723 fn as_raw_descriptor(&self) -> RawDescriptor {
724 self.container.as_raw_descriptor()
725 }
726}
727
728struct VfioGroup {
729 group: File,
730 device_num: u32,
731}
732
733impl VfioGroup {
734 fn new(container: &VfioContainer, id: u32) -> Result<Self> {
735 let group_path = format!("/dev/vfio/{id}");
736 let group_file = OpenOptions::new()
737 .read(true)
738 .write(true)
739 .open(Path::new(&group_path))
740 .map_err(|e| VfioError::OpenGroup(e, group_path))?;
741
742 let mut group_status = vfio_group_status {
743 argsz: mem::size_of::<vfio_group_status>() as u32,
744 flags: 0,
745 };
746 let mut ret =
747 unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS, &mut group_status) };
750 if ret < 0 {
751 return Err(VfioError::GetGroupStatus(get_error()));
752 }
753
754 if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
755 return Err(VfioError::GroupViable);
756 }
757
758 let container_raw_descriptor = container.as_raw_descriptor();
759 ret = unsafe {
763 ioctl_with_ref(
764 &group_file,
765 VFIO_GROUP_SET_CONTAINER,
766 &container_raw_descriptor,
767 )
768 };
769 if ret < 0 {
770 return Err(VfioError::GroupSetContainer(get_error()));
771 }
772
773 Ok(VfioGroup {
774 group: group_file,
775 device_num: 0,
776 })
777 }
778
779 fn get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32> {
780 let mut uuid_path = PathBuf::new();
781 uuid_path.push(sysfspath);
782 uuid_path.push("iommu_group");
783 let group_path = uuid_path
784 .read_link()
785 .map_err(|e| VfioError::ReadLink(e, uuid_path))?;
786 let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
787 let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
788 let group_id = group_str
789 .parse::<u32>()
790 .map_err(|_| VfioError::InvalidPath)?;
791
792 Ok(group_id)
793 }
794
795 fn kvm_device_set_group(
796 &self,
797 kvm_vfio_file: &SafeDescriptor,
798 ops: KvmVfioGroupOps,
799 ) -> Result<()> {
800 let group_descriptor = self.as_raw_descriptor();
801 let group_descriptor_ptr = &group_descriptor as *const i32;
802 let vfio_dev_attr = match ops {
803 KvmVfioGroupOps::Add => kvm_sys::kvm_device_attr {
804 flags: 0,
805 group: kvm_sys::KVM_DEV_VFIO_GROUP,
806 attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
807 addr: group_descriptor_ptr as u64,
808 },
809 KvmVfioGroupOps::Delete => kvm_sys::kvm_device_attr {
810 flags: 0,
811 group: kvm_sys::KVM_DEV_VFIO_GROUP,
812 attr: kvm_sys::KVM_DEV_VFIO_GROUP_DEL as u64,
813 addr: group_descriptor_ptr as u64,
814 },
815 };
816
817 if 0 != unsafe {
821 ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr)
822 } {
823 return Err(VfioError::KvmSetDeviceAttr(get_error()));
824 }
825
826 Ok(())
827 }
828
829 fn get_device(&self, name: &str) -> Result<File> {
830 let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
831 let path_ptr = path.as_ptr();
832
833 let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD, path_ptr) };
836 if ret < 0 {
837 return Err(VfioError::GroupGetDeviceFD(get_error()));
838 }
839
840 Ok(unsafe { File::from_raw_descriptor(ret) })
843 }
844
845 fn add_device_num(&mut self) {
846 self.device_num += 1;
847 }
848
849 fn reduce_device_num(&mut self) {
850 self.device_num -= 1;
851 }
852
853 fn device_num(&self) -> u32 {
854 self.device_num
855 }
856}
857
858impl AsRawDescriptor for VfioGroup {
859 fn as_raw_descriptor(&self) -> RawDescriptor {
860 self.group.as_raw_descriptor()
861 }
862}
863
864#[derive(Default)]
866pub struct VfioContainerManager {
867 no_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
869
870 iommu_containers: Vec<Arc<Mutex<VfioContainer>>>,
873
874 coiommu_container: Option<Arc<Mutex<VfioContainer>>>,
876
877 pkvm_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
879}
880
881impl VfioContainerManager {
882 pub fn new() -> Self {
883 Self::default()
884 }
885
886 pub fn get_container<P: AsRef<Path>>(
897 &mut self,
898 iommu_type: IommuDevType,
899 sysfspath: Option<P>,
900 ) -> Result<Arc<Mutex<VfioContainer>>> {
901 match iommu_type {
902 IommuDevType::NoIommu => {
903 if let Some(container) = &self.no_iommu_container {
905 Ok(container.clone())
906 } else {
907 let container = Arc::new(Mutex::new(VfioContainer::new()?));
908 self.no_iommu_container = Some(container.clone());
909 Ok(container)
910 }
911 }
912 IommuDevType::VirtioIommu => {
913 let path = sysfspath.ok_or(VfioError::InvalidPath)?;
914 let group_id = VfioGroup::get_group_id(path)?;
915
916 if let Some(container) = self
919 .iommu_containers
920 .iter()
921 .find(|container| container.lock().is_group_set(group_id))
922 {
923 Ok(container.clone())
924 } else {
925 let container = Arc::new(Mutex::new(VfioContainer::new()?));
926 self.iommu_containers.push(container.clone());
927 Ok(container)
928 }
929 }
930 IommuDevType::CoIommu => {
931 if let Some(container) = &self.coiommu_container {
933 Ok(container.clone())
934 } else {
935 let container = Arc::new(Mutex::new(VfioContainer::new()?));
936 self.coiommu_container = Some(container.clone());
937 Ok(container)
938 }
939 }
940 IommuDevType::PkvmPviommu => {
941 if let Some(container) = &self.pkvm_iommu_container {
943 Ok(container.clone())
944 } else {
945 let container = Arc::new(Mutex::new(VfioContainer::new()?));
946 self.pkvm_iommu_container = Some(container.clone());
947 Ok(container)
948 }
949 }
950 }
951 }
952}
953
954pub enum VfioIrqType {
956 Intx,
957 Msi,
958 Msix,
959}
960
961pub struct VfioIrq {
963 pub flags: u32,
964 pub index: u32,
965}
966
967#[derive(Debug, Default, Clone)]
969pub struct VfioRegionAddr {
970 pub index: usize,
972 pub addr: u64,
974}
975
976#[derive(Debug)]
977pub struct VfioRegion {
978 flags: u32,
980 size: u64,
981 offset: u64,
983 mmaps: Vec<vfio_region_sparse_mmap_area>,
985 cap_info: Option<(u32, u32)>,
987 msix_region_mmappable: bool,
990}
991
992pub struct VfioDevice {
994 dev: File,
995 name: String,
996 container: Arc<Mutex<VfioContainer>>,
997 dev_type: VfioDeviceType,
998 group_descriptor: RawDescriptor,
999 group_id: u32,
1000 regions: Vec<VfioRegion>,
1002 num_irqs: u32,
1003
1004 iova_alloc: Arc<Mutex<AddressAllocator>>,
1005 dt_symbol: Option<String>,
1006 pviommu: Option<(Arc<Mutex<KvmVfioPviommu>>, Vec<u32>)>,
1007}
1008
1009impl VfioDevice {
1010 pub fn new_passthrough<P: AsRef<Path>>(
1014 sysfspath: &P,
1015 vm: &impl Vm,
1016 container: Arc<Mutex<VfioContainer>>,
1017 iommu_dev: IommuDevType,
1018 dt_symbol: Option<String>,
1019 ) -> Result<Self> {
1020 let group_id = VfioGroup::get_group_id(sysfspath)?;
1021
1022 let group = container
1023 .lock()
1024 .get_group_with_vm(group_id, vm, iommu_dev)?;
1025 let name_osstr = sysfspath
1026 .as_ref()
1027 .file_name()
1028 .ok_or(VfioError::InvalidPath)?;
1029 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1030 let name = String::from(name_str);
1031 let dev = group.lock().get_device(&name)?;
1032 let (dev_info, dev_type) = Self::get_device_info(&dev)?;
1033 let regions = Self::get_regions(&dev, dev_info.num_regions)?;
1034 group.lock().add_device_num();
1035 let group_descriptor = group.lock().as_raw_descriptor();
1036
1037 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1038 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1039 .map_err(VfioError::Resources)?;
1040
1041 let pviommu = if matches!(iommu_dev, IommuDevType::PkvmPviommu) {
1042 let pviommu = KvmVfioPviommu::new(vm)?;
1044
1045 let vsids_len = KvmVfioPviommu::get_sid_count(vm, &dev)?.try_into().unwrap();
1046 let max_vsid = u32::MAX.try_into().unwrap();
1047 let random_vsids = sample(&mut rand::thread_rng(), max_vsid, vsids_len).into_iter();
1048 let vsids = Vec::from_iter(random_vsids.map(|v| u32::try_from(v).unwrap()));
1049 for (i, vsid) in vsids.iter().enumerate() {
1050 pviommu.attach(&dev, i.try_into().unwrap(), *vsid)?;
1051 }
1052
1053 Some((Arc::new(Mutex::new(pviommu)), vsids))
1054 } else {
1055 None
1056 };
1057
1058 Ok(VfioDevice {
1059 dev,
1060 name,
1061 container,
1062 dev_type,
1063 group_descriptor,
1064 group_id,
1065 regions,
1066 num_irqs: dev_info.num_irqs,
1067 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1068 dt_symbol,
1069 pviommu,
1070 })
1071 }
1072
1073 pub fn new<P: AsRef<Path>>(
1074 sysfspath: &P,
1075 container: Arc<Mutex<VfioContainer>>,
1076 ) -> Result<Self> {
1077 let group_id = VfioGroup::get_group_id(sysfspath)?;
1078 let group = container.lock().get_group(group_id)?;
1079 let name_osstr = sysfspath
1080 .as_ref()
1081 .file_name()
1082 .ok_or(VfioError::InvalidPath)?;
1083 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1084 let name = String::from(name_str);
1085
1086 let dev = match group.lock().get_device(&name) {
1087 Ok(dev) => dev,
1088 Err(e) => {
1089 container.lock().remove_group(group_id, false);
1090 return Err(e);
1091 }
1092 };
1093 let (dev_info, dev_type) = match Self::get_device_info(&dev) {
1094 Ok(dev_info) => dev_info,
1095 Err(e) => {
1096 container.lock().remove_group(group_id, false);
1097 return Err(e);
1098 }
1099 };
1100 let regions = match Self::get_regions(&dev, dev_info.num_regions) {
1101 Ok(regions) => regions,
1102 Err(e) => {
1103 container.lock().remove_group(group_id, false);
1104 return Err(e);
1105 }
1106 };
1107 group.lock().add_device_num();
1108 let group_descriptor = group.lock().as_raw_descriptor();
1109
1110 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1111 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1112 .map_err(VfioError::Resources)?;
1113
1114 Ok(VfioDevice {
1115 dev,
1116 name,
1117 container,
1118 dev_type,
1119 group_descriptor,
1120 group_id,
1121 regions,
1122 num_irqs: dev_info.num_irqs,
1123 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1124 dt_symbol: None,
1125 pviommu: None,
1126 })
1127 }
1128
1129 pub fn dev_file(&self) -> &File {
1131 &self.dev
1132 }
1133
1134 pub fn device_name(&self) -> &String {
1136 &self.name
1137 }
1138
1139 pub fn device_type(&self) -> VfioDeviceType {
1141 self.dev_type
1142 }
1143
1144 pub fn dt_symbol(&self) -> Option<&str> {
1146 self.dt_symbol.as_deref()
1147 }
1148
1149 pub fn iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])> {
1152 if let Some((ref pviommu, ref ids)) = self.pviommu {
1154 Some((
1155 IommuDevType::PkvmPviommu,
1156 Some(pviommu.lock().id()),
1157 ids.as_ref(),
1158 ))
1159 } else {
1160 None
1161 }
1162 }
1163
1164 pub fn pm_low_power_enter(&self) -> Result<()> {
1166 self.device_feature(VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY)
1167 .map_err(VfioError::VfioPmLowPowerEnter)
1168 }
1169
1170 pub fn pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()> {
1172 let payload = vfio_device_low_power_entry_with_wakeup {
1173 wakeup_eventfd: wakeup_evt.as_raw_descriptor(),
1174 reserved: 0,
1175 };
1176 let payload_size = mem::size_of::<vfio_device_low_power_entry_with_wakeup>();
1177 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(payload_size);
1178 device_feature[0].argsz = (mem::size_of::<vfio_device_feature>() + payload_size) as u32;
1179 device_feature[0].flags =
1180 VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP;
1181 unsafe {
1184 device_feature[0]
1185 .data
1186 .as_mut_slice(payload_size)
1187 .copy_from_slice(
1188 mem::transmute::<vfio_device_low_power_entry_with_wakeup, [u8; 8]>(payload)
1189 .as_slice(),
1190 );
1191 }
1192 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1195 if ret < 0 {
1196 Err(VfioError::VfioPmLowPowerEnter(get_error()))
1197 } else {
1198 Ok(())
1199 }
1200 }
1201
1202 pub fn pm_low_power_exit(&self) -> Result<()> {
1204 self.device_feature(VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_EXIT)
1205 .map_err(VfioError::VfioPmLowPowerExit)
1206 }
1207
1208 fn device_feature(&self, flags: u32) -> result::Result<(), Error> {
1209 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1210 device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1211 device_feature[0].flags = flags;
1212 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1215 if ret < 0 {
1216 Err(get_error())
1217 } else {
1218 Ok(())
1219 }
1220 }
1221
1222 pub fn acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>> {
1224 let count = args.len();
1225 let mut dsm = vec_with_array_field::<vfio_acpi_dsm, u8>(count);
1226 dsm[0].argsz = (mem::size_of::<vfio_acpi_dsm>() + mem::size_of_val(args)) as u32;
1227 dsm[0].padding = 0;
1228 unsafe {
1231 dsm[0].args.as_mut_slice(count).clone_from_slice(args);
1232 }
1233 let ret = unsafe { ioctl_with_mut_ref(&self.dev, VFIO_DEVICE_ACPI_DSM, &mut dsm[0]) };
1236 if ret < 0 {
1237 Err(VfioError::VfioAcpiDsm(get_error()))
1238 } else {
1239 let res = unsafe { dsm[0].args.as_slice(count) };
1242 Ok(res.to_vec())
1243 }
1244 }
1245
1246 pub fn acpi_notification_evt_enable(
1248 &self,
1249 acpi_notification_eventfd: &Event,
1250 index: u32,
1251 ) -> Result<()> {
1252 let u32_size = mem::size_of::<u32>();
1253 let count = 1;
1254
1255 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1256 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1257 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1258 irq_set[0].index = index;
1259 irq_set[0].start = 0;
1260 irq_set[0].count = count as u32;
1261
1262 let data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1265 data.copy_from_slice(&acpi_notification_eventfd.as_raw_descriptor().to_ne_bytes()[..]);
1266
1267 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1270 if ret < 0 {
1271 Err(VfioError::VfioAcpiNotificationEnable(get_error()))
1272 } else {
1273 Ok(())
1274 }
1275 }
1276
1277 pub fn acpi_notification_disable(&self, index: u32) -> Result<()> {
1279 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1280 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1281 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1282 irq_set[0].index = index;
1283 irq_set[0].start = 0;
1284 irq_set[0].count = 0;
1285
1286 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1289 if ret < 0 {
1290 Err(VfioError::VfioAcpiNotificationDisable(get_error()))
1291 } else {
1292 Ok(())
1293 }
1294 }
1295
1296 pub fn acpi_notification_test(&self, index: u32, val: u32) -> Result<()> {
1300 let u32_size = mem::size_of::<u32>();
1301 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1302 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + u32_size) as u32;
1303 irq_set[0].flags = VFIO_IRQ_SET_DATA_BOOL | VFIO_IRQ_SET_ACTION_TRIGGER;
1304 irq_set[0].index = index;
1305 irq_set[0].start = 0;
1306 irq_set[0].count = 1;
1307
1308 let data = unsafe { irq_set[0].data.as_mut_slice(u32_size) };
1311 data.copy_from_slice(&val.to_ne_bytes()[..]);
1312
1313 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1316 if ret < 0 {
1317 Err(VfioError::VfioAcpiNotificationTest(get_error()))
1318 } else {
1319 Ok(())
1320 }
1321 }
1322
1323 pub fn irq_enable(
1331 &self,
1332 descriptors: &[Option<&Event>],
1333 index: u32,
1334 subindex: u32,
1335 ) -> Result<()> {
1336 let count = descriptors.len();
1337 let u32_size = mem::size_of::<u32>();
1338 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1339 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1340 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1341 irq_set[0].index = index;
1342 irq_set[0].start = subindex;
1343 irq_set[0].count = count as u32;
1344
1345 let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1351 for descriptor in descriptors.iter().take(count) {
1352 let (left, right) = data.split_at_mut(u32_size);
1353 match descriptor {
1354 Some(fd) => left.copy_from_slice(&fd.as_raw_descriptor().to_ne_bytes()[..]),
1355 None => left.copy_from_slice(&(-1i32).to_ne_bytes()[..]),
1356 }
1357 data = right;
1358 }
1359
1360 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1363 if ret < 0 {
1364 Err(VfioError::VfioIrqEnable(get_error()))
1365 } else {
1366 Ok(())
1367 }
1368 }
1369
1370 pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()> {
1380 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1381 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
1382 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
1383 irq_set[0].index = index;
1384 irq_set[0].start = 0;
1385 irq_set[0].count = 1;
1386
1387 {
1388 let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
1394 descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
1395 }
1396
1397 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1400 if ret < 0 {
1401 Err(VfioError::VfioIrqEnable(get_error()))
1402 } else {
1403 Ok(())
1404 }
1405 }
1406
1407 pub fn irq_disable(&self, index: u32) -> Result<()> {
1409 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1410 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1411 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1412 irq_set[0].index = index;
1413 irq_set[0].start = 0;
1414 irq_set[0].count = 0;
1415
1416 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1419 if ret < 0 {
1420 Err(VfioError::VfioIrqDisable(get_error()))
1421 } else {
1422 Ok(())
1423 }
1424 }
1425
1426 pub fn irq_unmask(&self, index: u32) -> Result<()> {
1428 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1429 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1430 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
1431 irq_set[0].index = index;
1432 irq_set[0].start = 0;
1433 irq_set[0].count = 1;
1434
1435 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1438 if ret < 0 {
1439 Err(VfioError::VfioIrqUnmask(get_error()))
1440 } else {
1441 Ok(())
1442 }
1443 }
1444
1445 pub fn irq_mask(&self, index: u32) -> Result<()> {
1447 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1448 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1449 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
1450 irq_set[0].index = index;
1451 irq_set[0].start = 0;
1452 irq_set[0].count = 1;
1453
1454 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1457 if ret < 0 {
1458 Err(VfioError::VfioIrqMask(get_error()))
1459 } else {
1460 Ok(())
1461 }
1462 }
1463
1464 fn get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)> {
1466 let mut dev_info = vfio_device_info {
1467 argsz: mem::size_of::<vfio_device_info>() as u32,
1468 flags: 0,
1469 num_regions: 0,
1470 num_irqs: 0,
1471 ..Default::default()
1472 };
1473
1474 let ret = unsafe { ioctl_with_mut_ref(device_file, VFIO_DEVICE_GET_INFO, &mut dev_info) };
1478 if ret < 0 {
1479 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1480 }
1481
1482 let dev_type = if (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) != 0 {
1483 if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
1484 || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
1485 {
1486 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1487 }
1488
1489 VfioDeviceType::Pci
1490 } else if (dev_info.flags & VFIO_DEVICE_FLAGS_PLATFORM) != 0 {
1491 VfioDeviceType::Platform
1492 } else {
1493 return Err(VfioError::UnknownDeviceType(dev_info.flags));
1494 };
1495
1496 Ok((dev_info, dev_type))
1497 }
1498
1499 pub fn get_irqs(&self) -> Result<Vec<VfioIrq>> {
1502 let mut irqs: Vec<VfioIrq> = Vec::new();
1503
1504 for i in 0..self.num_irqs {
1505 let argsz = mem::size_of::<vfio_irq_info>() as u32;
1506 let mut irq_info = vfio_irq_info {
1507 argsz,
1508 flags: 0,
1509 index: i,
1510 count: 0,
1511 };
1512 let ret = unsafe {
1516 ioctl_with_mut_ref(self.device_file(), VFIO_DEVICE_GET_IRQ_INFO, &mut irq_info)
1517 };
1518 if ret < 0 || irq_info.count != 1 {
1519 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1520 }
1521
1522 let irq = VfioIrq {
1523 flags: irq_info.flags,
1524 index: irq_info.index,
1525 };
1526 irqs.push(irq);
1527 }
1528 Ok(irqs)
1529 }
1530
1531 #[allow(clippy::cast_ptr_alignment)]
1532 fn get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>> {
1533 let mut regions: Vec<VfioRegion> = Vec::new();
1534 for i in 0..num_regions {
1535 let argsz = mem::size_of::<vfio_region_info>() as u32;
1536 let mut reg_info = vfio_region_info {
1537 argsz,
1538 flags: 0,
1539 index: i,
1540 cap_offset: 0,
1541 size: 0,
1542 offset: 0,
1543 };
1544 let ret =
1545 unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO, &mut reg_info) };
1549 if ret < 0 {
1550 continue;
1551 }
1552
1553 let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
1554 let mut cap_info: Option<(u32, u32)> = None;
1555 let mut msix_region_mmappable = false;
1556 if reg_info.argsz > argsz {
1557 let cap_len: usize = (reg_info.argsz - argsz) as usize;
1558 let mut region_with_cap =
1559 vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
1560 region_with_cap[0].region_info.argsz = reg_info.argsz;
1561 region_with_cap[0].region_info.flags = 0;
1562 region_with_cap[0].region_info.index = i;
1563 region_with_cap[0].region_info.cap_offset = 0;
1564 region_with_cap[0].region_info.size = 0;
1565 region_with_cap[0].region_info.offset = 0;
1566 let ret = unsafe {
1570 ioctl_with_mut_ref(
1571 dev,
1572 VFIO_DEVICE_GET_REGION_INFO,
1573 &mut (region_with_cap[0].region_info),
1574 )
1575 };
1576 if ret < 0 {
1577 return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
1578 }
1579
1580 reg_info = region_with_cap[0].region_info;
1587
1588 if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
1589 continue;
1590 }
1591
1592 let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
1593 let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
1594 let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
1595 let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
1596 let region_info_sz = reg_info.argsz;
1597
1598 let info_ptr = region_with_cap.as_ptr() as *mut u8;
1604 let mut offset = region_with_cap[0].region_info.cap_offset;
1605 while offset != 0 {
1606 if offset + cap_header_sz > region_info_sz {
1607 break;
1608 }
1609 let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
1613 let cap_header = unsafe { &*(cap_ptr as *const vfio_info_cap_header) };
1617 if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
1618 if offset + mmap_cap_sz > region_info_sz {
1619 break;
1620 }
1621 let sparse_mmap =
1623 unsafe { &*(cap_ptr as *const vfio_region_info_cap_sparse_mmap) };
1627
1628 let area_num = sparse_mmap.nr_areas;
1629 if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
1630 break;
1631 }
1632 let areas =
1633 unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
1637 for area in areas.iter() {
1638 mmaps.push(*area);
1639 }
1640
1641 msix_region_mmappable = true;
1646 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
1647 if offset + type_cap_sz > region_info_sz {
1648 break;
1649 }
1650 let cap_type_info =
1652 unsafe { &*(cap_ptr as *const vfio_region_info_cap_type) };
1656
1657 cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
1658 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_MSIX_MAPPABLE {
1659 mmaps.push(vfio_region_sparse_mmap_area {
1660 offset: 0,
1661 size: region_with_cap[0].region_info.size,
1662 });
1663 msix_region_mmappable = true;
1664 }
1665
1666 offset = cap_header.next;
1667 }
1668 } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1669 mmaps.push(vfio_region_sparse_mmap_area {
1670 offset: 0,
1671 size: reg_info.size,
1672 });
1673 }
1674
1675 let region = VfioRegion {
1676 flags: reg_info.flags,
1677 size: reg_info.size,
1678 offset: reg_info.offset,
1679 mmaps,
1680 cap_info,
1681 msix_region_mmappable,
1682 };
1683 regions.push(region);
1684 }
1685
1686 Ok(regions)
1687 }
1688
1689 pub fn get_region_flags(&self, index: usize) -> u32 {
1696 match self.regions.get(index) {
1697 Some(v) => v.flags,
1698 None => {
1699 warn!("get_region_flags() with invalid index: {}", index);
1700 0
1701 }
1702 }
1703 }
1704
1705 pub fn get_region_offset(&self, index: usize) -> u64 {
1708 match self.regions.get(index) {
1709 Some(v) => v.offset,
1710 None => {
1711 warn!("get_region_offset with invalid index: {}", index);
1712 0
1713 }
1714 }
1715 }
1716
1717 pub fn get_region_size(&self, index: usize) -> u64 {
1720 match self.regions.get(index) {
1721 Some(v) => v.size,
1722 None => {
1723 warn!("get_region_size with invalid index: {}", index);
1724 0
1725 }
1726 }
1727 }
1728
1729 pub fn get_region_count(&self) -> usize {
1732 self.regions.len()
1733 }
1734
1735 pub fn get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area> {
1737 match self.regions.get(index) {
1738 Some(v) => v.mmaps.clone(),
1739 None => {
1740 warn!("get_region_mmap with invalid index: {}", index);
1741 Vec::new()
1742 }
1743 }
1744 }
1745
1746 pub fn get_region_msix_mmappable(&self, index: usize) -> bool {
1749 match self.regions.get(index) {
1750 Some(v) => v.msix_region_mmappable,
1751 None => {
1752 warn!("get_region_msix_mmappable with invalid index: {}", index);
1753 false
1754 }
1755 }
1756 }
1757
1758 pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
1767 for (index, region) in self.regions.iter().enumerate() {
1768 if let Some(cap_info) = ®ion.cap_info {
1769 if cap_info.0 == type_ && cap_info.1 == sub_type {
1770 return Some((index as u32, region.size));
1771 }
1772 }
1773 }
1774
1775 None
1776 }
1777
1778 pub fn get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64> {
1781 let region = self
1782 .regions
1783 .get(addr.index)
1784 .ok_or(VfioError::InvalidIndex(addr.index))?;
1785 Ok(region.offset + addr.addr)
1786 }
1787
1788 pub fn region_read(&self, index: usize, buf: &mut [u8], addr: u64) {
1793 let stub: &VfioRegion = self
1794 .regions
1795 .get(index)
1796 .unwrap_or_else(|| panic!("tried to read VFIO with an invalid index: {index}"));
1797
1798 let size = buf.len() as u64;
1799 if size > stub.size || addr + size > stub.size {
1800 panic!(
1801 "tried to read VFIO region with invalid arguments: index={index}, addr=0x{addr:x}, size=0x{size:x}"
1802 );
1803 }
1804
1805 self.dev
1806 .read_exact_at(buf, stub.offset + addr)
1807 .unwrap_or_else(|e| {
1808 panic!("failed to read region: index={index}, addr=0x{addr:x}, error={e}")
1809 });
1810 }
1811
1812 pub fn region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T {
1814 let mut val = mem::MaybeUninit::zeroed();
1815 let buf =
1816 unsafe { slice::from_raw_parts_mut(val.as_mut_ptr() as *mut u8, mem::size_of::<T>()) };
1819 self.region_read(addr.index, buf, addr.addr + offset);
1820 unsafe { val.assume_init() }
1823 }
1824
1825 pub fn region_write(&self, index: usize, buf: &[u8], addr: u64) {
1830 let stub: &VfioRegion = self
1831 .regions
1832 .get(index)
1833 .unwrap_or_else(|| panic!("tried to write VFIO with an invalid index: {index}"));
1834
1835 let size = buf.len() as u64;
1836 if size > stub.size
1837 || addr + size > stub.size
1838 || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1839 {
1840 panic!(
1841 "tried to write VFIO region with invalid arguments: index={index}, addr=0x{addr:x}, size=0x{size:x}"
1842 );
1843 }
1844
1845 self.dev
1846 .write_all_at(buf, stub.offset + addr)
1847 .unwrap_or_else(|e| {
1848 panic!("failed to write region: index={index}, addr=0x{addr:x}, error={e}")
1849 });
1850 }
1851
1852 pub fn region_write_to_addr(&self, data: &[u8], addr: &VfioRegionAddr, offset: u64) {
1854 self.region_write(addr.index, data, addr.addr + offset);
1855 }
1856
1857 pub fn keep_rds(&self) -> Vec<RawDescriptor> {
1859 vec![
1860 self.dev.as_raw_descriptor(),
1861 self.group_descriptor,
1862 self.container.lock().as_raw_descriptor(),
1863 ]
1864 }
1865
1866 pub unsafe fn vfio_dma_map(
1871 &self,
1872 iova: u64,
1873 size: u64,
1874 user_addr: u64,
1875 write_en: bool,
1876 ) -> Result<()> {
1877 self.container
1878 .lock()
1879 .vfio_dma_map(iova, size, user_addr, write_en)
1880 }
1881
1882 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
1884 self.container.lock().vfio_dma_unmap(iova, size)
1885 }
1886
1887 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
1888 self.container.lock().vfio_get_iommu_page_size_mask()
1889 }
1890
1891 pub fn alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64> {
1892 self.iova_alloc
1893 .lock()
1894 .allocate_with_align(size, alloc, "alloc_iova".to_owned(), align_size)
1895 .map_err(VfioError::Resources)
1896 }
1897
1898 pub fn get_iova(&self, alloc: &Alloc) -> Option<AddressRange> {
1899 self.iova_alloc.lock().get(alloc).map(|res| res.0)
1900 }
1901
1902 pub fn release_iova(&self, alloc: Alloc) -> Result<AddressRange> {
1903 self.iova_alloc
1904 .lock()
1905 .release(alloc)
1906 .map_err(VfioError::Resources)
1907 }
1908
1909 pub fn get_max_addr(&self) -> u64 {
1910 self.iova_alloc.lock().get_max_addr()
1911 }
1912
1913 pub fn device_file(&self) -> &File {
1915 &self.dev
1916 }
1917
1918 pub fn close(&self) {
1920 self.container.lock().remove_group(self.group_id, true);
1921 }
1922}
1923
1924pub struct VfioPciConfig {
1925 device: Arc<VfioDevice>,
1926}
1927
1928impl VfioPciConfig {
1929 pub fn new(device: Arc<VfioDevice>) -> Self {
1930 VfioPciConfig { device }
1931 }
1932
1933 pub fn read_config<T: IntoBytes + FromBytes>(&self, offset: u32) -> T {
1934 let mut config = T::new_zeroed();
1935 self.device.region_read(
1936 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1937 config.as_mut_bytes(),
1938 offset.into(),
1939 );
1940 config
1941 }
1942
1943 pub fn write_config<T: Immutable + IntoBytes>(&self, config: T, offset: u32) {
1944 self.device.region_write(
1945 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1946 config.as_bytes(),
1947 offset.into(),
1948 );
1949 }
1950
1951 pub fn set_bus_master(&self) {
1953 const PCI_COMMAND: u32 = 0x4;
1955 const PCI_COMMAND_MASTER: u16 = 0x4;
1957
1958 let mut cmd: u16 = self.read_config(PCI_COMMAND);
1959
1960 if cmd & PCI_COMMAND_MASTER != 0 {
1961 return;
1962 }
1963
1964 cmd |= PCI_COMMAND_MASTER;
1965
1966 self.write_config(cmd, PCI_COMMAND);
1967 }
1968}
1969
1970impl AsRawDescriptor for VfioDevice {
1971 fn as_raw_descriptor(&self) -> RawDescriptor {
1972 self.dev.as_raw_descriptor()
1973 }
1974}