1#![cfg(target_arch = "aarch64")]
8
9use std::collections::BTreeMap;
10use std::fs::File;
11use std::io;
12use std::path::PathBuf;
13use std::sync::atomic::AtomicU32;
14use std::sync::mpsc;
15use std::sync::Arc;
16
17#[cfg(feature = "gdb")]
18use aarch64_sys_reg::AArch64SysRegId;
19use arch::get_serial_cmdline;
20use arch::CpuSet;
21use arch::DevicePowerManagerConfig;
22use arch::DtbOverlay;
23use arch::FdtPosition;
24use arch::GetSerialCmdlineError;
25use arch::MemoryRegionConfig;
26use arch::RunnableLinuxVm;
27use arch::VcpuAffinity;
28use arch::VmComponents;
29use arch::VmImage;
30use base::warn;
31use base::MemoryMappingBuilder;
32use base::SendTube;
33use base::Tube;
34use devices::serial_device::SerialHardware;
35use devices::serial_device::SerialParameters;
36use devices::vmwdt::VMWDT_DEFAULT_CLOCK_HZ;
37use devices::vmwdt::VMWDT_DEFAULT_TIMEOUT_SEC;
38use devices::Bus;
39use devices::BusDeviceObj;
40use devices::BusError;
41use devices::BusType;
42use devices::DevicePowerManager;
43use devices::HvcDevicePowerManager;
44use devices::IrqChip;
45use devices::IrqChipAArch64;
46use devices::IrqEventSource;
47use devices::PciAddress;
48use devices::PciConfigMmio;
49use devices::PciDevice;
50use devices::PciRootCommand;
51use devices::Serial;
52use devices::SmcccTrng;
53#[cfg(any(target_os = "android", target_os = "linux"))]
54use devices::VirtCpufreq;
55#[cfg(any(target_os = "android", target_os = "linux"))]
56use devices::VirtCpufreqV2;
57use fdt::PciAddressSpace;
58#[cfg(feature = "gdb")]
59use gdbstub::arch::Arch;
60#[cfg(feature = "gdb")]
61use gdbstub_arch::aarch64::reg::id::AArch64RegId;
62#[cfg(feature = "gdb")]
63use gdbstub_arch::aarch64::AArch64 as GdbArch;
64use hypervisor::CpuConfigAArch64;
65use hypervisor::DeviceKind;
66use hypervisor::Hypervisor;
67use hypervisor::HypervisorCap;
68use hypervisor::MemCacheType;
69use hypervisor::ProtectionType;
70use hypervisor::VcpuAArch64;
71use hypervisor::VcpuFeature;
72use hypervisor::VcpuInitAArch64;
73use hypervisor::VcpuRegAArch64;
74use hypervisor::Vm;
75use hypervisor::VmAArch64;
76use hypervisor::VmCap;
77#[cfg(windows)]
78use jail::FakeMinijailStub as Minijail;
79use kernel_loader::LoadedKernel;
80#[cfg(any(target_os = "android", target_os = "linux"))]
81use minijail::Minijail;
82use remain::sorted;
83use resources::address_allocator::AddressAllocator;
84use resources::AddressRange;
85use resources::MmioType;
86use resources::SystemAllocator;
87use resources::SystemAllocatorConfig;
88use sync::Condvar;
89use sync::Mutex;
90use thiserror::Error;
91use vm_control::BatControl;
92use vm_control::BatteryType;
93use vm_memory::GuestAddress;
94use vm_memory::GuestMemory;
95use vm_memory::GuestMemoryError;
96use vm_memory::MemoryRegionOptions;
97use vm_memory::MemoryRegionPurpose;
98
99mod fdt;
100
101const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
102const AARCH64_FDT_ALIGN: u64 = 0x200000;
103const AARCH64_INITRD_ALIGN: u64 = 0x1000000;
104
105const AARCH64_CMDLINE_MAX_SIZE: usize = 2048;
107
108const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
110const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;
111
112const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
114const AARCH64_PLATFORM_MMIO_SIZE: u64 = 0x40000000;
115
116const AARCH64_PROTECTED_VM_FW_MAX_SIZE: u64 = 0x400000;
117const AARCH64_PROTECTED_VM_FW_START: u64 =
118 AARCH64_PHYS_MEM_START - AARCH64_PROTECTED_VM_FW_MAX_SIZE;
119
120const AARCH64_PVTIME_IPA_MAX_SIZE: u64 = 0x10000;
121const AARCH64_PVTIME_IPA_START: u64 = 0x1ff0000;
122const AARCH64_PVTIME_SIZE: u64 = 64;
123
124const AARCH64_GIC_DIST_BASE: u64 = 0x40000000 - AARCH64_GIC_DIST_SIZE;
127const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
128const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;
129const AARCH64_GIC_ITS_BASE: u64 = 0x40000000;
130const AARCH64_GIC_ITS_SIZE: u64 = 0x20000;
131
132const PSR_MODE_EL1H: u64 = 0x00000005;
134const PSR_F_BIT: u64 = 0x00000040;
135const PSR_I_BIT: u64 = 0x00000080;
136const PSR_A_BIT: u64 = 0x00000100;
137const PSR_D_BIT: u64 = 0x00000200;
138
139const AARCH64_SERIAL_SPEED: u32 = 1843200;
141const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
144const AARCH64_SERIAL_2_4_IRQ: u32 = 2;
145
146const AARCH64_RTC_ADDR: u64 = 0x2000;
148const AARCH64_RTC_SIZE: u64 = 0x1000;
150const AARCH64_RTC_IRQ: u32 = 1;
152
153const AARCH64_BAT_IRQ: u32 = 3;
155
156const AARCH64_VMWDT_ADDR: u64 = 0x3000;
158const AARCH64_VMWDT_SIZE: u64 = 0x1000;
160
161const AARCH64_PCI_CAM_BASE_DEFAULT: u64 = 0x10000;
163const AARCH64_PCI_CAM_SIZE_DEFAULT: u64 = 0x1000000;
165const AARCH64_PCI_MEM_BASE_DEFAULT: u64 = 0x2000000;
167const AARCH64_PCI_MEM_SIZE_DEFAULT: u64 = 0x2000000;
169const AARCH64_IRQ_BASE: u32 = 4;
171
172const AARCH64_VIRTFREQ_BASE: u64 = 0x1040000;
174const AARCH64_VIRTFREQ_SIZE: u64 = 0x8;
175const AARCH64_VIRTFREQ_MAXSIZE: u64 = 0x10000;
176const AARCH64_VIRTFREQ_V2_SIZE: u64 = 0x1000;
177
178const AARCH64_PMU_IRQ: u32 = 7;
180
181const AARCH64_VMWDT_IRQ: u32 = 15;
183
184enum PayloadType {
185 Bios {
186 entry: GuestAddress,
187 image_size: u64,
188 },
189 Kernel(LoadedKernel),
190}
191
192impl PayloadType {
193 fn entry(&self) -> GuestAddress {
194 match self {
195 Self::Bios {
196 entry,
197 image_size: _,
198 } => *entry,
199 Self::Kernel(k) => k.entry,
200 }
201 }
202
203 fn size(&self) -> u64 {
204 match self {
205 Self::Bios {
206 entry: _,
207 image_size,
208 } => *image_size,
209 Self::Kernel(k) => k.size,
210 }
211 }
212
213 fn address_range(&self) -> AddressRange {
214 match self {
215 Self::Bios { entry, image_size } => {
216 AddressRange::from_start_and_size(entry.offset(), *image_size)
217 .expect("invalid BIOS address range")
218 }
219 Self::Kernel(k) => {
220 AddressRange::from_start_and_size(k.entry.offset(), k.size)
223 .expect("invalid kernel address range")
224 }
225 }
226 }
227}
228
229fn get_swiotlb_addr(
232 memory_size: u64,
233 swiotlb_size: u64,
234 hypervisor: &(impl Hypervisor + ?Sized),
235) -> Option<GuestAddress> {
236 if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
237 Some(GuestAddress(
238 AARCH64_PHYS_MEM_START + memory_size - swiotlb_size,
239 ))
240 } else {
241 None
242 }
243}
244
245#[sorted]
246#[derive(Error, Debug)]
247pub enum Error {
248 #[error("failed to allocate IRQ number")]
249 AllocateIrq,
250 #[error("bios could not be loaded: {0}")]
251 BiosLoadFailure(arch::LoadImageError),
252 #[error("failed to build arm pvtime memory: {0}")]
253 BuildPvtimeError(base::MmapError),
254 #[error("unable to clone an Event: {0}")]
255 CloneEvent(base::Error),
256 #[error("failed to clone IRQ chip: {0}")]
257 CloneIrqChip(base::Error),
258 #[error("the given kernel command line was invalid: {0}")]
259 Cmdline(kernel_cmdline::Error),
260 #[error("bad PCI CAM configuration: {0}")]
261 ConfigurePciCam(String),
262 #[error("bad PCI mem configuration: {0}")]
263 ConfigurePciMem(String),
264 #[error("failed to configure CPU Frequencies: {0}")]
265 CpuFrequencies(base::Error),
266 #[error("failed to configure CPU topology: {0}")]
267 CpuTopology(base::Error),
268 #[error("unable to create battery devices: {0}")]
269 CreateBatDevices(arch::DeviceRegistrationError),
270 #[error("unable to make an Event: {0}")]
271 CreateEvent(base::Error),
272 #[error("FDT could not be created: {0}")]
273 CreateFdt(cros_fdt::Error),
274 #[error("failed to create GIC: {0}")]
275 CreateGICFailure(base::Error),
276 #[error("failed to create a PCI root hub: {0}")]
277 CreatePciRoot(arch::DeviceRegistrationError),
278 #[error("failed to create platform bus: {0}")]
279 CreatePlatformBus(arch::DeviceRegistrationError),
280 #[error("unable to create serial devices: {0}")]
281 CreateSerialDevices(arch::DeviceRegistrationError),
282 #[error("failed to create socket: {0}")]
283 CreateSocket(io::Error),
284 #[error("failed to create tube: {0}")]
285 CreateTube(base::TubeError),
286 #[error("failed to create VCPU: {0}")]
287 CreateVcpu(base::Error),
288 #[error("unable to create vm watchdog timer device: {0}")]
289 CreateVmwdtDevice(anyhow::Error),
290 #[error("custom pVM firmware could not be loaded: {0}")]
291 CustomPvmFwLoadFailure(arch::LoadImageError),
292 #[error("vm created wrong kind of vcpu")]
293 DowncastVcpu,
294 #[error("error enabling hypercalls base={0:#x}, count={1}: {2}")]
295 EnableHypercalls(u64, usize, base::Error),
296 #[error("failed to enable singlestep execution: {0}")]
297 EnableSinglestep(base::Error),
298 #[error("failed to finalize IRQ chip: {0}")]
299 FinalizeIrqChip(base::Error),
300 #[error("failed to get HW breakpoint count: {0}")]
301 GetMaxHwBreakPoint(base::Error),
302 #[error("failed to get PSCI version: {0}")]
303 GetPsciVersion(base::Error),
304 #[error("failed to get serial cmdline: {0}")]
305 GetSerialCmdline(GetSerialCmdlineError),
306 #[error("failed to initialize arm pvtime: {0}")]
307 InitPvtimeError(base::Error),
308 #[error("initrd could not be loaded: {0}")]
309 InitrdLoadFailure(arch::LoadImageError),
310 #[error("failed to initialize virtual machine {0}")]
311 InitVmError(anyhow::Error),
312 #[error("kernel could not be loaded: {0}")]
313 KernelLoadFailure(kernel_loader::Error),
314 #[error("error loading Kernel from Elf image: {0}")]
315 LoadElfKernel(kernel_loader::Error),
316 #[error("failed to map arm pvtime memory: {0}")]
317 MapPvtimeError(base::Error),
318 #[error("missing power manager for assigned devices")]
319 MissingDevicePowerManager,
320 #[error("pVM firmware could not be loaded: {0}")]
321 PvmFwLoadFailure(base::Error),
322 #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
323 RamoopsAddress(u64, u64),
324 #[error("error reading guest memory: {0}")]
325 ReadGuestMemory(vm_memory::GuestMemoryError),
326 #[error("error reading CPU register: {0}")]
327 ReadReg(base::Error),
328 #[error("error reading CPU registers: {0}")]
329 ReadRegs(base::Error),
330 #[error("error registering hypercalls base={0:#x}, count={1}: {2}")]
331 RegisterHypercalls(u64, usize, BusError),
332 #[error("failed to register irq fd: {0}")]
333 RegisterIrqfd(base::Error),
334 #[error("error registering PCI bus: {0}")]
335 RegisterPci(BusError),
336 #[error("error registering virtual cpufreq device: {0}")]
337 RegisterVirtCpufreq(BusError),
338 #[error("error registering virtual socket device: {0}")]
339 RegisterVsock(arch::DeviceRegistrationError),
340 #[error("failed to set device attr: {0}")]
341 SetDeviceAttr(base::Error),
342 #[error("failed to set a hardware breakpoint: {0}")]
343 SetHwBreakpoint(base::Error),
344 #[error("failed to set register: {0}")]
345 SetReg(base::Error),
346 #[error("failed to set up guest memory: {0}")]
347 SetupGuestMemory(GuestMemoryError),
348 #[error("this function isn't supported")]
349 Unsupported,
350 #[error("failed to initialize VCPU: {0}")]
351 VcpuInit(base::Error),
352 #[error("error writing guest memory: {0}")]
353 WriteGuestMemory(GuestMemoryError),
354 #[error("error writing CPU register: {0}")]
355 WriteReg(base::Error),
356 #[error("error writing CPU registers: {0}")]
357 WriteRegs(base::Error),
358}
359
360pub type Result<T> = std::result::Result<T, Error>;
361
362fn load_kernel(
363 guest_mem: &GuestMemory,
364 kernel_start: GuestAddress,
365 mut kernel_image: &mut File,
366) -> Result<LoadedKernel> {
367 if let Ok(elf_kernel) = kernel_loader::load_elf(
368 guest_mem,
369 kernel_start,
370 &mut kernel_image,
371 AARCH64_PHYS_MEM_START,
372 ) {
373 return Ok(elf_kernel);
374 }
375
376 if let Ok(lz4_kernel) =
377 kernel_loader::load_arm64_kernel_lz4(guest_mem, kernel_start, &mut kernel_image)
378 {
379 return Ok(lz4_kernel);
380 }
381
382 kernel_loader::load_arm64_kernel(guest_mem, kernel_start, kernel_image)
383 .map_err(Error::KernelLoadFailure)
384}
385
386pub struct AArch64;
387
388fn get_block_size() -> u64 {
389 let page_size = base::pagesize();
390 let ptes_per_page = page_size / 8;
393 let block_size = page_size * ptes_per_page;
394
395 block_size as u64
396}
397
398fn get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64> {
399 const MPIDR_AFF_MASK: u64 = 0xff_00ff_ffff;
400
401 Some(vcpus.get(index)?.get_mpidr().ok()? & MPIDR_AFF_MASK)
402}
403
404fn main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64 {
405 let mut main_memory_size = components.memory_size;
408 if let Some(size) = components.swiotlb {
409 if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
410 main_memory_size -= size;
411 }
412 }
413 main_memory_size
414}
415
416pub struct ArchMemoryLayout {
417 pci_cam: AddressRange,
418 pci_mem: AddressRange,
419}
420
421impl arch::LinuxArch for AArch64 {
422 type Error = Error;
423 type ArchMemoryLayout = ArchMemoryLayout;
424
425 fn arch_memory_layout(
426 components: &VmComponents,
427 ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
428 let (pci_cam_start, pci_cam_size) = match components.pci_config.cam {
429 Some(MemoryRegionConfig { start, size }) => {
430 (start, size.unwrap_or(AARCH64_PCI_CAM_SIZE_DEFAULT))
431 }
432 None => (AARCH64_PCI_CAM_BASE_DEFAULT, AARCH64_PCI_CAM_SIZE_DEFAULT),
433 };
434 if pci_cam_size != AARCH64_PCI_CAM_SIZE_DEFAULT {
436 return Err(Error::ConfigurePciCam(format!(
437 "PCI CAM size must be {AARCH64_PCI_CAM_SIZE_DEFAULT:#x}, got {pci_cam_size:#x}"
438 )));
439 }
440 let pci_cam = AddressRange::from_start_and_size(pci_cam_start, pci_cam_size).ok_or(
441 Error::ConfigurePciCam("PCI CAM region overflowed".to_string()),
442 )?;
443 if pci_cam.end >= AARCH64_PHYS_MEM_START {
444 return Err(Error::ConfigurePciCam(format!(
445 "PCI CAM ({pci_cam:?}) must be before start of RAM ({AARCH64_PHYS_MEM_START:#x})"
446 )));
447 }
448
449 let pci_mem = match components.pci_config.mem {
450 Some(MemoryRegionConfig { start, size }) => AddressRange::from_start_and_size(
451 start,
452 size.unwrap_or(AARCH64_PCI_MEM_SIZE_DEFAULT),
453 )
454 .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
455 None => AddressRange::from_start_and_size(
456 AARCH64_PCI_MEM_BASE_DEFAULT,
457 AARCH64_PCI_MEM_SIZE_DEFAULT,
458 )
459 .unwrap(),
460 };
461
462 Ok(ArchMemoryLayout { pci_cam, pci_mem })
463 }
464
465 fn guest_memory_layout(
468 components: &VmComponents,
469 _arch_memory_layout: &Self::ArchMemoryLayout,
470 hypervisor: &impl Hypervisor,
471 ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
472 let main_memory_size = main_memory_size(components, hypervisor);
473
474 let mut memory_regions = vec![(
475 GuestAddress(AARCH64_PHYS_MEM_START),
476 main_memory_size,
477 MemoryRegionOptions::new().align(get_block_size()),
478 )];
479
480 if components.hv_cfg.protection_type.runs_firmware() {
482 memory_regions.push((
483 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
484 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
485 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
486 ));
487 }
488
489 if let Some(size) = components.swiotlb {
490 if let Some(addr) = get_swiotlb_addr(components.memory_size, size, hypervisor) {
491 memory_regions.push((
492 addr,
493 size,
494 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::StaticSwiotlbRegion),
495 ));
496 }
497 }
498
499 Ok(memory_regions)
500 }
501
502 fn get_system_allocator_config<V: Vm>(
503 vm: &V,
504 arch_memory_layout: &Self::ArchMemoryLayout,
505 ) -> SystemAllocatorConfig {
506 let guest_phys_end = 1u64 << vm.get_guest_phys_addr_bits();
507 let plat_mmio_base = vm.get_memory().end_addr().offset();
509 let plat_mmio_size = AARCH64_PLATFORM_MMIO_SIZE;
510 let high_mmio_base = plat_mmio_base + plat_mmio_size;
512 let high_mmio_size = guest_phys_end
513 .checked_sub(high_mmio_base)
514 .unwrap_or_else(|| {
515 panic!("guest_phys_end {guest_phys_end:#x} < high_mmio_base {high_mmio_base:#x}",);
516 });
517 SystemAllocatorConfig {
518 io: None,
519 low_mmio: arch_memory_layout.pci_mem,
520 high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
521 .expect("invalid high mmio region"),
522 platform_mmio: Some(
523 AddressRange::from_start_and_size(plat_mmio_base, plat_mmio_size)
524 .expect("invalid platform mmio region"),
525 ),
526 first_irq: AARCH64_IRQ_BASE,
527 }
528 }
529
530 fn build_vm<V, Vcpu>(
531 mut components: VmComponents,
532 arch_memory_layout: &Self::ArchMemoryLayout,
533 _vm_evt_wrtube: &SendTube,
534 system_allocator: &mut SystemAllocator,
535 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
536 serial_jail: Option<Minijail>,
537 (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>),
538 mut vm: V,
539 ramoops_region: Option<arch::pstore::RamoopsRegion>,
540 devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
541 irq_chip: &mut dyn IrqChipAArch64,
542 vcpu_ids: &mut Vec<usize>,
543 dump_device_tree_blob: Option<PathBuf>,
544 _debugcon_jail: Option<Minijail>,
545 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
546 _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
547 device_tree_overlays: Vec<DtbOverlay>,
548 fdt_position: Option<FdtPosition>,
549 no_pmu: bool,
550 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
551 where
552 V: VmAArch64,
553 Vcpu: VcpuAArch64,
554 {
555 let has_bios = matches!(components.vm_image, VmImage::Bios(_));
556 let mem = vm.get_memory().clone();
557
558 let main_memory_size = main_memory_size(&components, vm.get_hypervisor());
559
560 if components.hv_cfg.protection_type.needs_firmware_loaded() {
563 arch::load_image(
564 &mem,
565 &mut components
566 .pvm_fw
567 .expect("pvmfw must be available if ProtectionType loads it"),
568 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
569 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
570 )
571 .map_err(Error::CustomPvmFwLoadFailure)?;
572 } else if components.hv_cfg.protection_type.runs_firmware() {
573 vm.load_protected_vm_firmware(
575 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
576 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
577 )
578 .map_err(Error::PvmFwLoadFailure)?;
579 }
580
581 let fdt_position = fdt_position.unwrap_or(if has_bios {
582 FdtPosition::Start
583 } else {
584 FdtPosition::End
585 });
586 let payload_address = match fdt_position {
587 FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_FDT_MAX_SIZE),
589 FdtPosition::End | FdtPosition::AfterPayload => GuestAddress(AARCH64_PHYS_MEM_START),
591 };
592
593 let mut initrd: Option<(GuestAddress, u32)> = None;
596 let (payload, payload_end_address) = match components.vm_image {
597 VmImage::Bios(ref mut bios) => {
598 let image_size = arch::load_image(&mem, bios, payload_address, u64::MAX)
599 .map_err(Error::BiosLoadFailure)?;
600 (
601 PayloadType::Bios {
602 entry: payload_address,
603 image_size: image_size as u64,
604 },
605 payload_address
606 .checked_add(image_size as u64)
607 .and_then(|end| end.checked_sub(1))
608 .unwrap(),
609 )
610 }
611 VmImage::Kernel(ref mut kernel_image) => {
612 let loaded_kernel = load_kernel(&mem, payload_address, kernel_image)?;
613 let kernel_end = loaded_kernel.address_range.end;
614 let mut payload_end = GuestAddress(kernel_end);
615 initrd = match components.initrd_image {
616 Some(initrd_file) => {
617 let mut initrd_file = initrd_file;
618 let initrd_addr = (kernel_end + 1 + (AARCH64_INITRD_ALIGN - 1))
619 & !(AARCH64_INITRD_ALIGN - 1);
620 let initrd_max_size =
621 main_memory_size.saturating_sub(initrd_addr - AARCH64_PHYS_MEM_START);
622 let initrd_addr = GuestAddress(initrd_addr);
623 let initrd_size =
624 arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
625 .map_err(Error::InitrdLoadFailure)?;
626 payload_end = initrd_addr
627 .checked_add(initrd_size as u64)
628 .and_then(|end| end.checked_sub(1))
629 .unwrap();
630 Some((initrd_addr, initrd_size))
631 }
632 None => None,
633 };
634 (PayloadType::Kernel(loaded_kernel), payload_end)
635 }
636 };
637
638 let memory_end = GuestAddress(AARCH64_PHYS_MEM_START + main_memory_size);
639
640 let fdt_address = match fdt_position {
641 FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START),
642 FdtPosition::End => {
643 let addr = memory_end
644 .checked_sub(AARCH64_FDT_MAX_SIZE)
645 .expect("Not enough memory for FDT")
646 .align_down(AARCH64_FDT_ALIGN);
647 assert!(addr > payload_end_address, "Not enough memory for FDT");
648 addr
649 }
650 FdtPosition::AfterPayload => payload_end_address
651 .checked_add(1)
652 .and_then(|addr| addr.align(AARCH64_FDT_ALIGN))
653 .expect("Not enough memory for FDT"),
654 };
655
656 let mut use_pmu = vm.check_capability(VmCap::ArmPmuV3);
657 use_pmu &= !no_pmu;
658 let vcpu_count = components.vcpu_count;
659 let mut has_pvtime = true;
660 let mut vcpus = Vec::with_capacity(vcpu_count);
661 let mut vcpu_init = Vec::with_capacity(vcpu_count);
662 for vcpu_id in 0..vcpu_count {
663 let vcpu: Vcpu = *vm
664 .create_vcpu(vcpu_id)
665 .map_err(Error::CreateVcpu)?
666 .downcast::<Vcpu>()
667 .map_err(|_| Error::DowncastVcpu)?;
668 let per_vcpu_init = if vm
669 .get_hypervisor()
670 .check_capability(HypervisorCap::HypervisorInitializedBootContext)
671 {
672 Default::default()
674 } else {
675 Self::vcpu_init(
676 vcpu_id,
677 &payload,
678 fdt_address,
679 components.hv_cfg.protection_type,
680 components.boot_cpu,
681 )
682 };
683 has_pvtime &= vcpu.has_pvtime_support();
684 vcpus.push(vcpu);
685 vcpu_ids.push(vcpu_id);
686 vcpu_init.push(per_vcpu_init);
687 }
688
689 let enable_sve = if components.sve_config.auto {
690 vm.check_capability(VmCap::Sve)
691 } else {
692 false
693 };
694
695 for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
697 let features = &Self::vcpu_features(vcpu_id, use_pmu, components.boot_cpu, enable_sve);
698 vcpu.init(features).map_err(Error::VcpuInit)?;
699 }
700
701 irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
702
703 if has_pvtime {
704 let pvtime_mem = MemoryMappingBuilder::new(AARCH64_PVTIME_IPA_MAX_SIZE as usize)
705 .build()
706 .map_err(Error::BuildPvtimeError)?;
707 vm.add_memory_region(
708 GuestAddress(AARCH64_PVTIME_IPA_START),
709 Box::new(pvtime_mem),
710 false,
711 false,
712 MemCacheType::CacheCoherent,
713 )
714 .map_err(Error::MapPvtimeError)?;
715 }
716
717 for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
718 use_pmu &= vcpu.init_pmu(AARCH64_PMU_IRQ as u64 + 16).is_ok();
719 if has_pvtime {
720 vcpu.init_pvtime(AARCH64_PVTIME_IPA_START + (vcpu_id as u64 * AARCH64_PVTIME_SIZE))
721 .map_err(Error::InitPvtimeError)?;
722 }
723 }
724
725 let mmio_bus = Arc::new(devices::Bus::new(BusType::Mmio));
726
727 let hypercall_bus = Arc::new(devices::Bus::new(BusType::Hypercall));
728
729 let io_bus = Arc::new(devices::Bus::new(BusType::Io));
731
732 let (suspend_tube_send, suspend_tube_recv) =
735 Tube::directional_pair().map_err(Error::CreateTube)?;
736 let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
737
738 let (pci_devices, others): (Vec<_>, Vec<_>) = devs
739 .into_iter()
740 .partition(|(dev, _)| dev.as_pci_device().is_some());
741
742 let pci_devices = pci_devices
743 .into_iter()
744 .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
745 .collect();
746 let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
747 arch::generate_pci_root(
748 pci_devices,
749 irq_chip.as_irq_chip_mut(),
750 mmio_bus.clone(),
751 GuestAddress(arch_memory_layout.pci_cam.start),
752 8,
753 io_bus.clone(),
754 system_allocator,
755 &mut vm,
756 (devices::AARCH64_GIC_NR_SPIS - AARCH64_IRQ_BASE) as usize,
757 None,
758 #[cfg(feature = "swap")]
759 swap_controller,
760 )
761 .map_err(Error::CreatePciRoot)?;
762
763 let pci_root = Arc::new(Mutex::new(pci));
764 let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
765 let (platform_devices, _others): (Vec<_>, Vec<_>) = others
766 .into_iter()
767 .partition(|(dev, _)| dev.as_platform_device().is_some());
768
769 let platform_devices = platform_devices
770 .into_iter()
771 .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
772 .collect();
773 let mut dev_pm = components.vfio_platform_pm.then(DevicePowerManager::new);
775 let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
776 arch::sys::linux::generate_platform_bus(
777 platform_devices,
778 irq_chip.as_irq_chip_mut(),
779 &mmio_bus,
780 system_allocator,
781 &mut vm,
782 #[cfg(feature = "swap")]
783 swap_controller,
784 &mut dev_pm,
785 components.hv_cfg.protection_type,
786 )
787 .map_err(Error::CreatePlatformBus)?;
788 pid_debug_label_map.append(&mut platform_pid_debug_label_map);
789
790 if components.smccc_trng {
791 let arced_trng = Arc::new(SmcccTrng::new());
792 for fid_range in [SmcccTrng::HVC32_FID_RANGE, SmcccTrng::HVC64_FID_RANGE] {
793 let base = fid_range.start.into();
794 let count = fid_range.len();
795 hypercall_bus
796 .insert_sync(arced_trng.clone(), base, count.try_into().unwrap())
797 .map_err(|e| Error::RegisterHypercalls(base, count, e))?;
798 vm.enable_hypercalls(base, count)
799 .map_err(|e| Error::EnableHypercalls(base, count, e))?;
800 }
801 }
802
803 if let Some(config) = components.dev_pm {
804 let dev_pm = dev_pm.ok_or(Error::MissingDevicePowerManager)?;
805 match config {
806 DevicePowerManagerConfig::PkvmHvc => {
807 let hvc_pm_dev = HvcDevicePowerManager::new(dev_pm);
808 let hvc_id = HvcDevicePowerManager::HVC_FUNCTION_ID.into();
809 hypercall_bus
810 .insert_sync(Arc::new(hvc_pm_dev), hvc_id, 1)
811 .map_err(|e| Error::RegisterHypercalls(hvc_id, 1, e))?;
812 vm.enable_hypercall(hvc_id)
813 .map_err(|e| Error::EnableHypercalls(hvc_id, 1, e))?;
814 }
815 }
816 }
817
818 let (vmwdt_host_tube, vmwdt_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
819 Self::add_arch_devs(
820 irq_chip.as_irq_chip_mut(),
821 &mmio_bus,
822 vcpu_count,
823 _vm_evt_wrtube,
824 vmwdt_control_tube,
825 )?;
826
827 let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
828 let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
829 let serial_devices = arch::add_serial_devices(
830 components.hv_cfg.protection_type,
831 &mmio_bus,
832 (AARCH64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
833 (AARCH64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
834 serial_parameters,
835 serial_jail,
836 #[cfg(feature = "swap")]
837 swap_controller,
838 )
839 .map_err(Error::CreateSerialDevices)?;
840
841 let source = IrqEventSource {
842 device_id: Serial::device_id(),
843 queue_id: 0,
844 device_name: Serial::debug_label(),
845 };
846 irq_chip
847 .register_edge_irq_event(AARCH64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
848 .map_err(Error::RegisterIrqfd)?;
849 irq_chip
850 .register_edge_irq_event(AARCH64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
851 .map_err(Error::RegisterIrqfd)?;
852
853 mmio_bus
854 .insert(
855 pci_bus,
856 arch_memory_layout.pci_cam.start,
857 arch_memory_layout.pci_cam.len().unwrap(),
858 )
859 .map_err(Error::RegisterPci)?;
860
861 let (vcpufreq_host_tube, vcpufreq_control_tube) =
862 Tube::pair().map_err(Error::CreateTube)?;
863 let vcpufreq_shared_tube = Arc::new(Mutex::new(vcpufreq_control_tube));
864 #[cfg(any(target_os = "android", target_os = "linux"))]
865 if !components.cpu_frequencies.is_empty() {
866 let mut freq_domain_vcpus: BTreeMap<u32, Vec<usize>> = BTreeMap::new();
867 let mut freq_domain_perfs: BTreeMap<u32, Arc<AtomicU32>> = BTreeMap::new();
868 let mut vcpu_affinities: Vec<u32> = Vec::new();
869 for vcpu in 0..vcpu_count {
870 let freq_domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
871 freq_domain_vcpus.entry(freq_domain).or_default().push(vcpu);
872 let vcpu_affinity = match components.vcpu_affinity.clone() {
873 Some(VcpuAffinity::Global(v)) => v,
874 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&vcpu).unwrap_or_default(),
875 None => panic!("vcpu_affinity needs to be set for VirtCpufreq"),
876 };
877 vcpu_affinities.push(vcpu_affinity[0].try_into().unwrap());
878 }
879 for domain in freq_domain_vcpus.keys() {
880 let domain_perf = Arc::new(AtomicU32::new(0));
881 freq_domain_perfs.insert(*domain, domain_perf);
882 }
883 let largest_vcpu_affinity_idx = *vcpu_affinities.iter().max().unwrap() as usize;
884 for (vcpu, vcpu_affinity) in vcpu_affinities.iter().enumerate() {
885 let mut virtfreq_size = AARCH64_VIRTFREQ_SIZE;
886 if components.virt_cpufreq_v2 {
887 let domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
888 virtfreq_size = AARCH64_VIRTFREQ_V2_SIZE;
889 let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreqV2::new(
890 *vcpu_affinity,
891 components.cpu_frequencies.get(&vcpu).unwrap().clone(),
892 components.vcpu_domain_paths.get(&vcpu).cloned(),
893 domain,
894 *components.normalized_cpu_ipc_ratios.get(&vcpu).unwrap(),
895 largest_vcpu_affinity_idx,
896 vcpufreq_shared_tube.clone(),
897 freq_domain_vcpus.get(&domain).unwrap().clone(),
898 freq_domain_perfs.get(&domain).unwrap().clone(),
899 )));
900 mmio_bus
901 .insert(
902 virt_cpufreq,
903 AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
904 virtfreq_size,
905 )
906 .map_err(Error::RegisterVirtCpufreq)?;
907 } else {
908 let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreq::new(
909 *vcpu_affinity,
910 *components.cpu_capacity.get(&vcpu).unwrap(),
911 *components
912 .cpu_frequencies
913 .get(&vcpu)
914 .unwrap()
915 .iter()
916 .max()
917 .unwrap(),
918 )));
919 mmio_bus
920 .insert(
921 virt_cpufreq,
922 AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
923 virtfreq_size,
924 )
925 .map_err(Error::RegisterVirtCpufreq)?;
926 }
927
928 if vcpu as u64 * AARCH64_VIRTFREQ_SIZE + virtfreq_size > AARCH64_VIRTFREQ_MAXSIZE {
929 panic!("Exceeded maximum number of virt cpufreq devices");
930 }
931 }
932 }
933
934 let mut cmdline = Self::get_base_linux_cmdline();
935 get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
936 .map_err(Error::GetSerialCmdline)?;
937 for param in components.extra_kernel_params {
938 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
939 }
940
941 if let Some(ramoops_region) = ramoops_region {
942 arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
943 .map_err(Error::Cmdline)?;
944 }
945
946 let psci_version = vcpus[0].get_psci_version().map_err(Error::GetPsciVersion)?;
947
948 let pci_cfg = fdt::PciConfigRegion {
949 base: arch_memory_layout.pci_cam.start,
950 size: arch_memory_layout.pci_cam.len().unwrap(),
951 };
952
953 let mut pci_ranges: Vec<fdt::PciRange> = Vec::new();
954
955 let mut add_pci_ranges =
956 |alloc: &AddressAllocator, space: PciAddressSpace, prefetchable: bool| {
957 pci_ranges.extend(alloc.pools().iter().map(|range| fdt::PciRange {
958 space,
959 bus_address: range.start,
960 cpu_physical_address: range.start,
961 size: range.len().unwrap(),
962 prefetchable,
963 }));
964 };
965
966 add_pci_ranges(
967 system_allocator.mmio_allocator(MmioType::Low),
968 PciAddressSpace::Memory,
969 false, );
971 add_pci_ranges(
972 system_allocator.mmio_allocator(MmioType::High),
973 PciAddressSpace::Memory64,
974 true, );
976
977 let (bat_control, bat_mmio_base_and_irq) = match bat_type {
978 Some(BatteryType::Goldfish) => {
979 let bat_irq = AARCH64_BAT_IRQ;
980
981 let mut amls = Vec::new();
983 let (control_tube, mmio_base) = arch::sys::linux::add_goldfish_battery(
984 &mut amls,
985 bat_jail,
986 &mmio_bus,
987 irq_chip.as_irq_chip_mut(),
988 bat_irq,
989 system_allocator,
990 #[cfg(feature = "swap")]
991 swap_controller,
992 )
993 .map_err(Error::CreateBatDevices)?;
994 (
995 Some(BatControl {
996 type_: BatteryType::Goldfish,
997 control_tube,
998 }),
999 Some((mmio_base, bat_irq)),
1000 )
1001 }
1002 None => (None, None),
1003 };
1004
1005 let vmwdt_cfg = fdt::VmWdtConfig {
1006 base: AARCH64_VMWDT_ADDR,
1007 size: AARCH64_VMWDT_SIZE,
1008 clock_hz: VMWDT_DEFAULT_CLOCK_HZ,
1009 timeout_sec: VMWDT_DEFAULT_TIMEOUT_SEC,
1010 };
1011
1012 fdt::create_fdt(
1013 AARCH64_FDT_MAX_SIZE as usize,
1014 &mem,
1015 pci_irqs,
1016 pci_cfg,
1017 &pci_ranges,
1018 dev_resources,
1019 vcpu_count as u32,
1020 &|n| get_vcpu_mpidr_aff(&vcpus, n),
1021 components.cpu_clusters,
1022 components.cpu_capacity,
1023 components.cpu_frequencies,
1024 fdt_address,
1025 cmdline
1026 .as_str_with_max_len(AARCH64_CMDLINE_MAX_SIZE - 1)
1027 .map_err(Error::Cmdline)?,
1028 payload.address_range(),
1029 initrd,
1030 components.android_fstab,
1031 irq_chip.get_vgic_version() == DeviceKind::ArmVgicV3,
1032 irq_chip.has_vgic_its(),
1033 use_pmu,
1034 psci_version,
1035 components.swiotlb.map(|size| {
1036 (
1037 get_swiotlb_addr(components.memory_size, size, vm.get_hypervisor()),
1038 size,
1039 )
1040 }),
1041 bat_mmio_base_and_irq,
1042 vmwdt_cfg,
1043 dump_device_tree_blob,
1044 &|writer, phandles| vm.create_fdt(writer, phandles),
1045 components.dynamic_power_coefficient,
1046 device_tree_overlays,
1047 &serial_devices,
1048 components.virt_cpufreq_v2,
1049 )
1050 .map_err(Error::CreateFdt)?;
1051
1052 vm.init_arch(
1053 payload.entry(),
1054 fdt_address,
1055 AARCH64_FDT_MAX_SIZE.try_into().unwrap(),
1056 )
1057 .map_err(Error::InitVmError)?;
1058
1059 let vm_request_tubes = vec![vmwdt_host_tube, vcpufreq_host_tube];
1060
1061 Ok(RunnableLinuxVm {
1062 vm,
1063 vcpu_count,
1064 vcpus: Some(vcpus),
1065 vcpu_init,
1066 vcpu_affinity: components.vcpu_affinity,
1067 no_smt: components.no_smt,
1068 irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
1069 io_bus,
1070 mmio_bus,
1071 hypercall_bus,
1072 pid_debug_label_map,
1073 suspend_tube: (suspend_tube_send, suspend_tube_recv),
1074 rt_cpus: components.rt_cpus,
1075 delay_rt: components.delay_rt,
1076 bat_control,
1077 pm: None,
1078 resume_notify_devices: Vec::new(),
1079 root_config: pci_root,
1080 platform_devices,
1081 hotplug_bus: BTreeMap::new(),
1082 devices_thread: None,
1083 vm_request_tubes,
1084 })
1085 }
1086
1087 fn configure_vcpu<V: Vm>(
1088 _vm: &V,
1089 _hypervisor: &dyn Hypervisor,
1090 _irq_chip: &mut dyn IrqChipAArch64,
1091 vcpu: &mut dyn VcpuAArch64,
1092 vcpu_init: VcpuInitAArch64,
1093 _vcpu_id: usize,
1094 _num_cpus: usize,
1095 _cpu_config: Option<CpuConfigAArch64>,
1096 ) -> std::result::Result<(), Self::Error> {
1097 for (reg, value) in vcpu_init.regs.iter() {
1098 vcpu.set_one_reg(*reg, *value).map_err(Error::SetReg)?;
1099 }
1100 Ok(())
1101 }
1102
1103 fn register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>(
1104 _linux: &mut RunnableLinuxVm<V, Vcpu>,
1105 _device: Box<dyn PciDevice>,
1106 _minijail: Option<Minijail>,
1107 _resources: &mut SystemAllocator,
1108 _tube: &mpsc::Sender<PciRootCommand>,
1109 #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
1110 ) -> std::result::Result<PciAddress, Self::Error> {
1111 Err(Error::Unsupported)
1113 }
1114
1115 fn get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1117 Self::collect_for_online_cpus(base::logical_core_max_freq_khz)
1118 .map_err(Error::CpuFrequencies)
1119 }
1120
1121 fn get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>
1123 {
1124 Self::collect_for_online_cpus(base::logical_core_frequencies_khz)
1125 .map_err(Error::CpuFrequencies)
1126 }
1127
1128 fn get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1131 Self::collect_for_online_cpus(base::logical_core_capacity).map_err(Error::CpuTopology)
1132 }
1133
1134 fn get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error> {
1136 let cluster_ids = Self::collect_for_online_cpus(base::logical_core_cluster_id)
1137 .map_err(Error::CpuTopology)?;
1138 get_host_cpu_clusters_for_cluster_ids(cluster_ids)
1139 }
1140}
1141
1142fn get_host_cpu_clusters_for_cluster_ids(
1143 cluster_ids: BTreeMap<usize, u32>,
1144) -> std::result::Result<Vec<CpuSet>, Error> {
1145 let mut unique_clusters: Vec<CpuSet> = cluster_ids
1146 .iter()
1147 .map(|(_, &vcpu_cluster_id)| {
1148 cluster_ids
1149 .iter()
1150 .filter(|(_, &other_vcpu_cluster_id)| vcpu_cluster_id == other_vcpu_cluster_id)
1151 .map(|(cpu_id, _)| cpu_id)
1152 .copied()
1153 .collect()
1154 })
1155 .collect();
1156 unique_clusters.sort_unstable();
1157 unique_clusters.dedup();
1158 Ok(unique_clusters)
1159}
1160
1161#[cfg(feature = "gdb")]
1162impl<T: VcpuAArch64> arch::GdbOps<T> for AArch64 {
1163 type Error = Error;
1164
1165 fn read_memory(
1166 _vcpu: &T,
1167 guest_mem: &GuestMemory,
1168 vaddr: GuestAddress,
1169 len: usize,
1170 ) -> Result<Vec<u8>> {
1171 let mut buf = vec![0; len];
1172
1173 guest_mem
1174 .read_exact_at_addr(&mut buf, vaddr)
1175 .map_err(Error::ReadGuestMemory)?;
1176
1177 Ok(buf)
1178 }
1179
1180 fn write_memory(
1181 _vcpu: &T,
1182 guest_mem: &GuestMemory,
1183 vaddr: GuestAddress,
1184 buf: &[u8],
1185 ) -> Result<()> {
1186 guest_mem
1187 .write_all_at_addr(buf, vaddr)
1188 .map_err(Error::WriteGuestMemory)
1189 }
1190
1191 fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
1192 let mut regs: <GdbArch as Arch>::Registers = Default::default();
1193 assert!(
1194 regs.x.len() == 31,
1195 "unexpected number of Xn general purpose registers"
1196 );
1197 for (i, reg) in regs.x.iter_mut().enumerate() {
1198 let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1199 *reg = vcpu
1200 .get_one_reg(VcpuRegAArch64::X(n))
1201 .map_err(Error::ReadReg)?;
1202 }
1203 regs.sp = vcpu
1204 .get_one_reg(VcpuRegAArch64::Sp)
1205 .map_err(Error::ReadReg)?;
1206 regs.pc = vcpu
1207 .get_one_reg(VcpuRegAArch64::Pc)
1208 .map_err(Error::ReadReg)?;
1209 regs.cpsr = vcpu
1211 .get_one_reg(VcpuRegAArch64::Pstate)
1212 .map_err(Error::ReadReg)? as u32;
1213 for (i, reg) in regs.v.iter_mut().enumerate() {
1214 let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1215 *reg = vcpu.get_vector_reg(n).map_err(Error::ReadReg)?;
1216 }
1217 regs.fpcr = vcpu
1218 .get_one_reg(VcpuRegAArch64::System(aarch64_sys_reg::FPCR))
1219 .map_err(Error::ReadReg)? as u32;
1220 regs.fpsr = vcpu
1221 .get_one_reg(VcpuRegAArch64::System(aarch64_sys_reg::FPSR))
1222 .map_err(Error::ReadReg)? as u32;
1223
1224 Ok(regs)
1225 }
1226
1227 fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()> {
1228 assert!(
1229 regs.x.len() == 31,
1230 "unexpected number of Xn general purpose registers"
1231 );
1232 for (i, reg) in regs.x.iter().enumerate() {
1233 let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1234 vcpu.set_one_reg(VcpuRegAArch64::X(n), *reg)
1235 .map_err(Error::WriteReg)?;
1236 }
1237 vcpu.set_one_reg(VcpuRegAArch64::Sp, regs.sp)
1238 .map_err(Error::WriteReg)?;
1239 vcpu.set_one_reg(VcpuRegAArch64::Pc, regs.pc)
1240 .map_err(Error::WriteReg)?;
1241 let pstate = vcpu
1243 .get_one_reg(VcpuRegAArch64::Pstate)
1244 .map_err(Error::ReadReg)?;
1245 let pstate = (pstate & 0xffff_ffff_0000_0000) | (regs.cpsr as u64);
1246 vcpu.set_one_reg(VcpuRegAArch64::Pstate, pstate)
1247 .map_err(Error::WriteReg)?;
1248 for (i, reg) in regs.v.iter().enumerate() {
1249 let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1250 vcpu.set_vector_reg(n, *reg).map_err(Error::WriteReg)?;
1251 }
1252 vcpu.set_one_reg(
1253 VcpuRegAArch64::System(aarch64_sys_reg::FPCR),
1254 u64::from(regs.fpcr),
1255 )
1256 .map_err(Error::WriteReg)?;
1257 vcpu.set_one_reg(
1258 VcpuRegAArch64::System(aarch64_sys_reg::FPSR),
1259 u64::from(regs.fpsr),
1260 )
1261 .map_err(Error::WriteReg)?;
1262
1263 Ok(())
1264 }
1265
1266 fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
1267 let result = match reg_id {
1268 AArch64RegId::X(n) => vcpu
1269 .get_one_reg(VcpuRegAArch64::X(n))
1270 .map(|v| v.to_ne_bytes().to_vec()),
1271 AArch64RegId::Sp => vcpu
1272 .get_one_reg(VcpuRegAArch64::Sp)
1273 .map(|v| v.to_ne_bytes().to_vec()),
1274 AArch64RegId::Pc => vcpu
1275 .get_one_reg(VcpuRegAArch64::Pc)
1276 .map(|v| v.to_ne_bytes().to_vec()),
1277 AArch64RegId::Pstate => vcpu
1278 .get_one_reg(VcpuRegAArch64::Pstate)
1279 .map(|v| (v as u32).to_ne_bytes().to_vec()),
1280 AArch64RegId::V(n) => vcpu.get_vector_reg(n).map(|v| v.to_ne_bytes().to_vec()),
1281 AArch64RegId::System(op) => vcpu
1282 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)))
1283 .map(|v| v.to_ne_bytes().to_vec()),
1284 _ => {
1285 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1286 Err(base::Error::new(libc::EINVAL))
1287 }
1288 };
1289
1290 match result {
1291 Ok(bytes) => Ok(bytes),
1292 Err(e) if e.errno() == libc::ENOENT => Ok(Vec::new()),
1294 Err(e) => Err(Error::ReadReg(e)),
1295 }
1296 }
1297
1298 fn write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()> {
1299 fn try_into_u32(data: &[u8]) -> Result<u32> {
1300 let s = data
1301 .get(..4)
1302 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1303 let a = s
1304 .try_into()
1305 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1306 Ok(u32::from_ne_bytes(a))
1307 }
1308
1309 fn try_into_u64(data: &[u8]) -> Result<u64> {
1310 let s = data
1311 .get(..8)
1312 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1313 let a = s
1314 .try_into()
1315 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1316 Ok(u64::from_ne_bytes(a))
1317 }
1318
1319 fn try_into_u128(data: &[u8]) -> Result<u128> {
1320 let s = data
1321 .get(..16)
1322 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1323 let a = s
1324 .try_into()
1325 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1326 Ok(u128::from_ne_bytes(a))
1327 }
1328
1329 match reg_id {
1330 AArch64RegId::X(n) => vcpu.set_one_reg(VcpuRegAArch64::X(n), try_into_u64(data)?),
1331 AArch64RegId::Sp => vcpu.set_one_reg(VcpuRegAArch64::Sp, try_into_u64(data)?),
1332 AArch64RegId::Pc => vcpu.set_one_reg(VcpuRegAArch64::Pc, try_into_u64(data)?),
1333 AArch64RegId::Pstate => {
1334 vcpu.set_one_reg(VcpuRegAArch64::Pstate, u64::from(try_into_u32(data)?))
1335 }
1336 AArch64RegId::V(n) => vcpu.set_vector_reg(n, try_into_u128(data)?),
1337 AArch64RegId::System(op) => vcpu.set_one_reg(
1338 VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)),
1339 try_into_u64(data)?,
1340 ),
1341 _ => {
1342 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1343 Err(base::Error::new(libc::EINVAL))
1344 }
1345 }
1346 .map_err(Error::WriteReg)
1347 }
1348
1349 fn enable_singlestep(vcpu: &T) -> Result<()> {
1350 const SINGLE_STEP: bool = true;
1351 vcpu.set_guest_debug(&[], SINGLE_STEP)
1352 .map_err(Error::EnableSinglestep)
1353 }
1354
1355 fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize> {
1356 vcpu.get_max_hw_bps().map_err(Error::GetMaxHwBreakPoint)
1357 }
1358
1359 fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
1360 const SINGLE_STEP: bool = false;
1361 vcpu.set_guest_debug(breakpoints, SINGLE_STEP)
1362 .map_err(Error::SetHwBreakpoint)
1363 }
1364}
1365
1366impl AArch64 {
1367 fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1369 let mut cmdline = kernel_cmdline::Cmdline::new();
1370 cmdline.insert_str("panic=-1").unwrap();
1371 cmdline
1372 }
1373
1374 fn add_arch_devs(
1383 irq_chip: &mut dyn IrqChip,
1384 bus: &Bus,
1385 vcpu_count: usize,
1386 vm_evt_wrtube: &SendTube,
1387 vmwdt_request_tube: Tube,
1388 ) -> Result<()> {
1389 let rtc_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1390 let rtc = devices::pl030::Pl030::new(rtc_evt.try_clone().map_err(Error::CloneEvent)?);
1391 irq_chip
1392 .register_edge_irq_event(AARCH64_RTC_IRQ, &rtc_evt, IrqEventSource::from_device(&rtc))
1393 .map_err(Error::RegisterIrqfd)?;
1394
1395 bus.insert(
1396 Arc::new(Mutex::new(rtc)),
1397 AARCH64_RTC_ADDR,
1398 AARCH64_RTC_SIZE,
1399 )
1400 .expect("failed to add rtc device");
1401
1402 let vmwdt_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1403 let vm_wdt = devices::vmwdt::Vmwdt::new(
1404 vcpu_count,
1405 vm_evt_wrtube.try_clone().unwrap(),
1406 vmwdt_evt.try_clone().map_err(Error::CloneEvent)?,
1407 vmwdt_request_tube,
1408 )
1409 .map_err(Error::CreateVmwdtDevice)?;
1410 irq_chip
1411 .register_edge_irq_event(
1412 AARCH64_VMWDT_IRQ,
1413 &vmwdt_evt,
1414 IrqEventSource::from_device(&vm_wdt),
1415 )
1416 .map_err(Error::RegisterIrqfd)?;
1417
1418 bus.insert(
1419 Arc::new(Mutex::new(vm_wdt)),
1420 AARCH64_VMWDT_ADDR,
1421 AARCH64_VMWDT_SIZE,
1422 )
1423 .expect("failed to add vmwdt device");
1424
1425 Ok(())
1426 }
1427
1428 fn vcpu_features(
1435 vcpu_id: usize,
1436 use_pmu: bool,
1437 boot_cpu: usize,
1438 enable_sve: bool,
1439 ) -> Vec<VcpuFeature> {
1440 let mut features = vec![VcpuFeature::PsciV0_2];
1441 if use_pmu {
1442 features.push(VcpuFeature::PmuV3);
1443 }
1444 if vcpu_id != boot_cpu {
1446 features.push(VcpuFeature::PowerOff);
1447 }
1448 if enable_sve {
1449 features.push(VcpuFeature::Sve);
1450 }
1451
1452 features
1453 }
1454
1455 fn vcpu_init(
1461 vcpu_id: usize,
1462 payload: &PayloadType,
1463 fdt_address: GuestAddress,
1464 protection_type: ProtectionType,
1465 boot_cpu: usize,
1466 ) -> VcpuInitAArch64 {
1467 let mut regs: BTreeMap<VcpuRegAArch64, u64> = Default::default();
1468
1469 let pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
1471 regs.insert(VcpuRegAArch64::Pstate, pstate);
1472
1473 if vcpu_id == boot_cpu {
1475 let entry_addr = if protection_type.needs_firmware_loaded() {
1476 Some(AARCH64_PROTECTED_VM_FW_START)
1477 } else if protection_type.runs_firmware() {
1478 None } else {
1480 Some(payload.entry().offset())
1481 };
1482
1483 if let Some(entry) = entry_addr {
1485 regs.insert(VcpuRegAArch64::Pc, entry);
1486 }
1487
1488 regs.insert(VcpuRegAArch64::X(0), fdt_address.offset());
1490
1491 if protection_type.runs_firmware() {
1492 regs.insert(VcpuRegAArch64::X(1), payload.entry().offset());
1494
1495 regs.insert(VcpuRegAArch64::X(2), payload.size());
1497 }
1498 }
1499
1500 VcpuInitAArch64 { regs }
1501 }
1502
1503 fn collect_for_online_cpus<G, T>(
1505 map_func: G,
1506 ) -> std::result::Result<BTreeMap<usize, T>, base::Error>
1507 where
1508 G: Fn(usize) -> std::result::Result<T, base::Error>,
1509 {
1510 let cpu_map = Self::collect_for_each_cpu(
1511 base::number_of_logical_cores().expect("Failed to read number of CPUs"),
1512 base::is_cpu_online,
1513 map_func,
1514 )?;
1515
1516 let online_cpus =
1517 base::number_of_online_cores().expect("Failed to read number of online CPUs");
1518 let actual_cpus = cpu_map.len();
1519 if online_cpus != actual_cpus {
1520 warn!("Only able to check {actual_cpus} of {online_cpus} online CPUs.");
1521 }
1522 Ok(cpu_map)
1523 }
1524
1525 fn collect_for_each_cpu<F, G, T>(
1528 num_cpus: usize,
1529 filter_func: F,
1530 map_func: G,
1531 ) -> std::result::Result<BTreeMap<usize, T>, base::Error>
1532 where
1533 F: Fn(usize) -> std::result::Result<bool, base::Error>,
1534 G: Fn(usize) -> std::result::Result<T, base::Error>,
1535 {
1536 let mut cpu_map = BTreeMap::new();
1537 for cpu_id in 0..num_cpus {
1538 if filter_func(cpu_id)? {
1539 cpu_map.insert(cpu_id, map_func(cpu_id)?);
1540 }
1541 }
1542 Ok(cpu_map)
1543 }
1544}
1545
1546#[cfg(test)]
1547mod tests {
1548 use super::*;
1549
1550 #[test]
1551 fn vcpu_init_unprotected_kernel() {
1552 let payload = PayloadType::Kernel(LoadedKernel {
1553 address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1554 size: 0x1000,
1555 entry: GuestAddress(0x8080_0000),
1556 class: kernel_loader::ElfClass::ElfClass64,
1557 });
1558 assert_eq!(
1559 payload.address_range(),
1560 AddressRange {
1561 start: 0x8080_0000,
1562 end: 0x8080_0fff
1563 }
1564 );
1565 let fdt_address = GuestAddress(0x1234);
1566 let prot = ProtectionType::Unprotected;
1567
1568 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1569
1570 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8080_0000));
1572
1573 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1575 }
1576
1577 #[test]
1578 fn vcpu_init_unprotected_bios() {
1579 let payload = PayloadType::Bios {
1580 entry: GuestAddress(0x8020_0000),
1581 image_size: 0x1000,
1582 };
1583 assert_eq!(
1584 payload.address_range(),
1585 AddressRange {
1586 start: 0x8020_0000,
1587 end: 0x8020_0fff
1588 }
1589 );
1590 let fdt_address = GuestAddress(0x1234);
1591 let prot = ProtectionType::Unprotected;
1592
1593 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1594
1595 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8020_0000));
1597
1598 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1600 }
1601
1602 #[test]
1603 fn vcpu_init_protected_kernel() {
1604 let payload = PayloadType::Kernel(LoadedKernel {
1605 address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1606 size: 0x1000,
1607 entry: GuestAddress(0x8080_0000),
1608 class: kernel_loader::ElfClass::ElfClass64,
1609 });
1610 assert_eq!(
1611 payload.address_range(),
1612 AddressRange {
1613 start: 0x8080_0000,
1614 end: 0x8080_0fff
1615 }
1616 );
1617 let fdt_address = GuestAddress(0x1234);
1618 let prot = ProtectionType::Protected;
1619
1620 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1621
1622 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), None);
1625
1626 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1628
1629 assert_eq!(
1631 vcpu_init.regs.get(&VcpuRegAArch64::X(1)),
1632 Some(&0x8080_0000)
1633 );
1634
1635 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(2)), Some(&0x1000));
1637 }
1638
1639 #[test]
1640 fn collect_for_each_cpu_simple() {
1641 let num_cpus = 2;
1642 let filter_func = |_cpu_id: usize| -> std::result::Result<bool, base::Error> { Ok(true) };
1643 let map_func = |cpu_id: usize| -> std::result::Result<usize, base::Error> { Ok(cpu_id) };
1644 let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func).unwrap();
1645
1646 assert_eq!(result.len(), num_cpus);
1647 assert_eq!(result.get(&0), Some(0).as_ref());
1648 assert_eq!(result.get(&1), Some(1).as_ref());
1649 }
1650
1651 #[test]
1652 fn collect_for_each_cpu_filter() {
1653 let num_cpus = 2;
1654 let filter_func =
1655 |cpu_id: usize| -> std::result::Result<bool, base::Error> { Ok(cpu_id % 2 == 0) };
1656 let map_func = |cpu_id: usize| -> std::result::Result<usize, base::Error> { Ok(cpu_id) };
1657 let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func).unwrap();
1658
1659 assert_eq!(result.len(), 1);
1660 assert_eq!(result.get(&0), Some(0).as_ref());
1661 }
1662
1663 #[test]
1664 fn collect_for_each_cpu_map() {
1665 let num_cpus = 4;
1666 let filter_func = |_| -> std::result::Result<bool, base::Error> { Ok(true) };
1667 let map_func =
1668 |cpu_id: usize| -> std::result::Result<usize, base::Error> { Ok(cpu_id * 2) };
1669 let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func).unwrap();
1670
1671 assert_eq!(result.len(), num_cpus);
1672 assert_eq!(result.get(&0), Some(0).as_ref());
1673 assert_eq!(result.get(&1), Some(2).as_ref());
1674 }
1675
1676 #[test]
1677 fn collect_for_each_cpu_filter_error() {
1678 let num_cpus = 1;
1679 let filter_func =
1680 |_| -> std::result::Result<bool, base::Error> { Err(base::Error::new(libc::EINVAL)) };
1681 let map_func = |cpu_id: usize| -> std::result::Result<usize, base::Error> { Ok(cpu_id) };
1682 let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func);
1683 assert!(result.is_err());
1684 assert_eq!(result.unwrap_err().errno(), libc::EINVAL);
1685 }
1686
1687 #[test]
1688 fn collect_for_each_cpu_map_error() {
1689 let num_cpus = 1;
1690 let filter_func = |_| -> std::result::Result<bool, base::Error> { Ok(true) };
1691 let map_func =
1692 |_| -> std::result::Result<usize, base::Error> { Err(base::Error::new(libc::EINVAL)) };
1693 let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func);
1694 assert!(result.is_err());
1695 assert_eq!(result.unwrap_err().errno(), libc::EINVAL);
1696 }
1697
1698 #[test]
1699 fn test_get_host_cpu_clusters_for_cluster_ids_unique() {
1700 let cluster_ids = BTreeMap::from([(0, 0), (1, 1), (2, 2)]);
1701 let result = get_host_cpu_clusters_for_cluster_ids(cluster_ids)
1702 .expect("shouldn't fail to get clusters");
1703 assert_eq!(
1704 vec![CpuSet::new([0]), CpuSet::new([1]), CpuSet::new([2])],
1705 result
1706 );
1707 }
1708
1709 #[test]
1710 fn get_host_cpu_clusters_for_cluster_ids_clustered() {
1711 let cluster_ids = BTreeMap::from([(0, 0), (1, 1), (2, 1)]);
1712 let result = get_host_cpu_clusters_for_cluster_ids(cluster_ids)
1713 .expect("shouldn't fail to get clusters");
1714 assert_eq!(vec![CpuSet::new([0]), CpuSet::new([1, 2])], result);
1715 }
1716
1717 #[test]
1718 fn get_host_cpu_clusters_for_cluster_ids_skip_cpu() {
1719 let cluster_ids = BTreeMap::from([(0, 0), (2, 1), (3, 1)]);
1720 let result = get_host_cpu_clusters_for_cluster_ids(cluster_ids)
1721 .expect("shouldn't fail to get clusters");
1722 assert_eq!(vec![CpuSet::new([0]), CpuSet::new([2, 3])], result);
1723 }
1724}