aarch64/
lib.rs

1// Copyright 2018 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! ARM 64-bit architecture support.
6
7#![cfg(target_arch = "aarch64")]
8
9use std::collections::BTreeMap;
10use std::fs::File;
11use std::io;
12use std::path::PathBuf;
13use std::sync::atomic::AtomicU32;
14use std::sync::mpsc;
15use std::sync::Arc;
16
17#[cfg(feature = "gdb")]
18use aarch64_sys_reg::AArch64SysRegId;
19use arch::get_serial_cmdline;
20use arch::CpuSet;
21use arch::DtbOverlay;
22use arch::FdtPosition;
23use arch::GetSerialCmdlineError;
24use arch::MemoryRegionConfig;
25use arch::RunnableLinuxVm;
26use arch::VcpuAffinity;
27use arch::VmComponents;
28use arch::VmImage;
29use base::MemoryMappingBuilder;
30use base::SendTube;
31use base::Tube;
32use devices::serial_device::SerialHardware;
33use devices::serial_device::SerialParameters;
34use devices::vmwdt::VMWDT_DEFAULT_CLOCK_HZ;
35use devices::vmwdt::VMWDT_DEFAULT_TIMEOUT_SEC;
36use devices::Bus;
37use devices::BusDeviceObj;
38use devices::BusError;
39use devices::BusType;
40use devices::IrqChip;
41use devices::IrqChipAArch64;
42use devices::IrqEventSource;
43use devices::PciAddress;
44use devices::PciConfigMmio;
45use devices::PciDevice;
46use devices::PciRootCommand;
47use devices::Serial;
48#[cfg(any(target_os = "android", target_os = "linux"))]
49use devices::VirtCpufreq;
50#[cfg(any(target_os = "android", target_os = "linux"))]
51use devices::VirtCpufreqV2;
52use fdt::PciAddressSpace;
53#[cfg(feature = "gdb")]
54use gdbstub::arch::Arch;
55#[cfg(feature = "gdb")]
56use gdbstub_arch::aarch64::reg::id::AArch64RegId;
57#[cfg(feature = "gdb")]
58use gdbstub_arch::aarch64::AArch64 as GdbArch;
59use hypervisor::CpuConfigAArch64;
60use hypervisor::DeviceKind;
61use hypervisor::Hypervisor;
62use hypervisor::HypervisorCap;
63use hypervisor::MemCacheType;
64use hypervisor::ProtectionType;
65use hypervisor::VcpuAArch64;
66use hypervisor::VcpuFeature;
67use hypervisor::VcpuInitAArch64;
68use hypervisor::VcpuRegAArch64;
69use hypervisor::Vm;
70use hypervisor::VmAArch64;
71use hypervisor::VmCap;
72#[cfg(windows)]
73use jail::FakeMinijailStub as Minijail;
74use kernel_loader::LoadedKernel;
75#[cfg(any(target_os = "android", target_os = "linux"))]
76use minijail::Minijail;
77use remain::sorted;
78use resources::address_allocator::AddressAllocator;
79use resources::AddressRange;
80use resources::MmioType;
81use resources::SystemAllocator;
82use resources::SystemAllocatorConfig;
83use sync::Condvar;
84use sync::Mutex;
85use thiserror::Error;
86use vm_control::BatControl;
87use vm_control::BatteryType;
88use vm_memory::GuestAddress;
89use vm_memory::GuestMemory;
90use vm_memory::GuestMemoryError;
91use vm_memory::MemoryRegionOptions;
92use vm_memory::MemoryRegionPurpose;
93
94mod fdt;
95
96const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
97const AARCH64_FDT_ALIGN: u64 = 0x200000;
98const AARCH64_INITRD_ALIGN: u64 = 0x1000000;
99
100// Maximum Linux arm64 kernel command line size (arch/arm64/include/uapi/asm/setup.h).
101const AARCH64_CMDLINE_MAX_SIZE: usize = 2048;
102
103// These constants indicate the address space used by the ARM vGIC.
104const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
105const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;
106
107// This indicates the start of DRAM inside the physical address space.
108const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
109const AARCH64_PLATFORM_MMIO_SIZE: u64 = 0x800000;
110
111const AARCH64_PROTECTED_VM_FW_MAX_SIZE: u64 = 0x400000;
112const AARCH64_PROTECTED_VM_FW_START: u64 =
113    AARCH64_PHYS_MEM_START - AARCH64_PROTECTED_VM_FW_MAX_SIZE;
114
115const AARCH64_PVTIME_IPA_MAX_SIZE: u64 = 0x10000;
116const AARCH64_PVTIME_IPA_START: u64 = 0x1ff0000;
117const AARCH64_PVTIME_SIZE: u64 = 64;
118
119// These constants indicate the placement of the GIC registers in the physical
120// address space.
121const AARCH64_GIC_DIST_BASE: u64 = 0x40000000 - AARCH64_GIC_DIST_SIZE;
122const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
123const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;
124const AARCH64_GIC_ITS_BASE: u64 = 0x40000000;
125const AARCH64_GIC_ITS_SIZE: u64 = 0x20000;
126
127// PSR (Processor State Register) bits
128const PSR_MODE_EL1H: u64 = 0x00000005;
129const PSR_F_BIT: u64 = 0x00000040;
130const PSR_I_BIT: u64 = 0x00000080;
131const PSR_A_BIT: u64 = 0x00000100;
132const PSR_D_BIT: u64 = 0x00000200;
133
134// This was the speed kvmtool used, not sure if it matters.
135const AARCH64_SERIAL_SPEED: u32 = 1843200;
136// The serial device gets the first interrupt line
137// Which gets mapped to the first SPI interrupt (physical 32).
138const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
139const AARCH64_SERIAL_2_4_IRQ: u32 = 2;
140
141// Place the RTC device at page 2
142const AARCH64_RTC_ADDR: u64 = 0x2000;
143// The RTC device gets one 4k page
144const AARCH64_RTC_SIZE: u64 = 0x1000;
145// The RTC device gets the second interrupt line
146const AARCH64_RTC_IRQ: u32 = 1;
147
148// The Goldfish battery device gets the 3rd interrupt line
149const AARCH64_BAT_IRQ: u32 = 3;
150
151// Place the virtual watchdog device at page 3
152const AARCH64_VMWDT_ADDR: u64 = 0x3000;
153// The virtual watchdog device gets one 4k page
154const AARCH64_VMWDT_SIZE: u64 = 0x1000;
155
156// Default PCI MMIO configuration region base address.
157const AARCH64_PCI_CAM_BASE_DEFAULT: u64 = 0x10000;
158// Default PCI MMIO configuration region size.
159const AARCH64_PCI_CAM_SIZE_DEFAULT: u64 = 0x1000000;
160// Default PCI mem base address.
161const AARCH64_PCI_MEM_BASE_DEFAULT: u64 = 0x2000000;
162// Default PCI mem size.
163const AARCH64_PCI_MEM_SIZE_DEFAULT: u64 = 0x2000000;
164// Virtio devices start at SPI interrupt number 4
165const AARCH64_IRQ_BASE: u32 = 4;
166
167// Virtual CPU Frequency Device.
168const AARCH64_VIRTFREQ_BASE: u64 = 0x1040000;
169const AARCH64_VIRTFREQ_SIZE: u64 = 0x8;
170const AARCH64_VIRTFREQ_MAXSIZE: u64 = 0x10000;
171const AARCH64_VIRTFREQ_V2_SIZE: u64 = 0x1000;
172
173// PMU PPI interrupt, same as qemu
174const AARCH64_PMU_IRQ: u32 = 7;
175
176// VCPU stall detector interrupt
177const AARCH64_VMWDT_IRQ: u32 = 15;
178
179enum PayloadType {
180    Bios {
181        entry: GuestAddress,
182        image_size: u64,
183    },
184    Kernel(LoadedKernel),
185}
186
187impl PayloadType {
188    fn entry(&self) -> GuestAddress {
189        match self {
190            Self::Bios {
191                entry,
192                image_size: _,
193            } => *entry,
194            Self::Kernel(k) => k.entry,
195        }
196    }
197
198    fn size(&self) -> u64 {
199        match self {
200            Self::Bios {
201                entry: _,
202                image_size,
203            } => *image_size,
204            Self::Kernel(k) => k.size,
205        }
206    }
207
208    fn address_range(&self) -> AddressRange {
209        match self {
210            Self::Bios { entry, image_size } => {
211                AddressRange::from_start_and_size(entry.offset(), *image_size)
212                    .expect("invalid BIOS address range")
213            }
214            Self::Kernel(k) => {
215                // TODO: b/389759119: use `k.address_range` to include regions that are present in
216                // memory but not in the original image file (e.g. `.bss` section).
217                AddressRange::from_start_and_size(k.entry.offset(), k.size)
218                    .expect("invalid kernel address range")
219            }
220        }
221    }
222}
223
224// When static swiotlb allocation is required, returns the address it should be allocated at.
225// Otherwise, returns None.
226fn get_swiotlb_addr(
227    memory_size: u64,
228    swiotlb_size: u64,
229    hypervisor: &(impl Hypervisor + ?Sized),
230) -> Option<GuestAddress> {
231    if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
232        Some(GuestAddress(
233            AARCH64_PHYS_MEM_START + memory_size - swiotlb_size,
234        ))
235    } else {
236        None
237    }
238}
239
240#[sorted]
241#[derive(Error, Debug)]
242pub enum Error {
243    #[error("failed to allocate IRQ number")]
244    AllocateIrq,
245    #[error("bios could not be loaded: {0}")]
246    BiosLoadFailure(arch::LoadImageError),
247    #[error("failed to build arm pvtime memory: {0}")]
248    BuildPvtimeError(base::MmapError),
249    #[error("unable to clone an Event: {0}")]
250    CloneEvent(base::Error),
251    #[error("failed to clone IRQ chip: {0}")]
252    CloneIrqChip(base::Error),
253    #[error("the given kernel command line was invalid: {0}")]
254    Cmdline(kernel_cmdline::Error),
255    #[error("bad PCI CAM configuration: {0}")]
256    ConfigurePciCam(String),
257    #[error("bad PCI mem configuration: {0}")]
258    ConfigurePciMem(String),
259    #[error("failed to configure CPU Frequencies: {0}")]
260    CpuFrequencies(base::Error),
261    #[error("failed to configure CPU topology: {0}")]
262    CpuTopology(base::Error),
263    #[error("unable to create battery devices: {0}")]
264    CreateBatDevices(arch::DeviceRegistrationError),
265    #[error("unable to make an Event: {0}")]
266    CreateEvent(base::Error),
267    #[error("FDT could not be created: {0}")]
268    CreateFdt(cros_fdt::Error),
269    #[error("failed to create GIC: {0}")]
270    CreateGICFailure(base::Error),
271    #[error("failed to create a PCI root hub: {0}")]
272    CreatePciRoot(arch::DeviceRegistrationError),
273    #[error("failed to create platform bus: {0}")]
274    CreatePlatformBus(arch::DeviceRegistrationError),
275    #[error("unable to create serial devices: {0}")]
276    CreateSerialDevices(arch::DeviceRegistrationError),
277    #[error("failed to create socket: {0}")]
278    CreateSocket(io::Error),
279    #[error("failed to create tube: {0}")]
280    CreateTube(base::TubeError),
281    #[error("failed to create VCPU: {0}")]
282    CreateVcpu(base::Error),
283    #[error("unable to create vm watchdog timer device: {0}")]
284    CreateVmwdtDevice(anyhow::Error),
285    #[error("custom pVM firmware could not be loaded: {0}")]
286    CustomPvmFwLoadFailure(arch::LoadImageError),
287    #[error("vm created wrong kind of vcpu")]
288    DowncastVcpu,
289    #[error("failed to enable singlestep execution: {0}")]
290    EnableSinglestep(base::Error),
291    #[error("failed to finalize IRQ chip: {0}")]
292    FinalizeIrqChip(base::Error),
293    #[error("failed to get HW breakpoint count: {0}")]
294    GetMaxHwBreakPoint(base::Error),
295    #[error("failed to get PSCI version: {0}")]
296    GetPsciVersion(base::Error),
297    #[error("failed to get serial cmdline: {0}")]
298    GetSerialCmdline(GetSerialCmdlineError),
299    #[error("failed to initialize arm pvtime: {0}")]
300    InitPvtimeError(base::Error),
301    #[error("initrd could not be loaded: {0}")]
302    InitrdLoadFailure(arch::LoadImageError),
303    #[error("failed to initialize virtual machine {0}")]
304    InitVmError(anyhow::Error),
305    #[error("kernel could not be loaded: {0}")]
306    KernelLoadFailure(kernel_loader::Error),
307    #[error("error loading Kernel from Elf image: {0}")]
308    LoadElfKernel(kernel_loader::Error),
309    #[error("failed to map arm pvtime memory: {0}")]
310    MapPvtimeError(base::Error),
311    #[error("pVM firmware could not be loaded: {0}")]
312    PvmFwLoadFailure(base::Error),
313    #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
314    RamoopsAddress(u64, u64),
315    #[error("error reading guest memory: {0}")]
316    ReadGuestMemory(vm_memory::GuestMemoryError),
317    #[error("error reading CPU register: {0}")]
318    ReadReg(base::Error),
319    #[error("error reading CPU registers: {0}")]
320    ReadRegs(base::Error),
321    #[error("failed to register irq fd: {0}")]
322    RegisterIrqfd(base::Error),
323    #[error("error registering PCI bus: {0}")]
324    RegisterPci(BusError),
325    #[error("error registering virtual cpufreq device: {0}")]
326    RegisterVirtCpufreq(BusError),
327    #[error("error registering virtual socket device: {0}")]
328    RegisterVsock(arch::DeviceRegistrationError),
329    #[error("failed to set device attr: {0}")]
330    SetDeviceAttr(base::Error),
331    #[error("failed to set a hardware breakpoint: {0}")]
332    SetHwBreakpoint(base::Error),
333    #[error("failed to set register: {0}")]
334    SetReg(base::Error),
335    #[error("failed to set up guest memory: {0}")]
336    SetupGuestMemory(GuestMemoryError),
337    #[error("this function isn't supported")]
338    Unsupported,
339    #[error("failed to initialize VCPU: {0}")]
340    VcpuInit(base::Error),
341    #[error("error writing guest memory: {0}")]
342    WriteGuestMemory(GuestMemoryError),
343    #[error("error writing CPU register: {0}")]
344    WriteReg(base::Error),
345    #[error("error writing CPU registers: {0}")]
346    WriteRegs(base::Error),
347}
348
349pub type Result<T> = std::result::Result<T, Error>;
350
351fn load_kernel(
352    guest_mem: &GuestMemory,
353    kernel_start: GuestAddress,
354    mut kernel_image: &mut File,
355) -> Result<LoadedKernel> {
356    if let Ok(elf_kernel) = kernel_loader::load_elf(
357        guest_mem,
358        kernel_start,
359        &mut kernel_image,
360        AARCH64_PHYS_MEM_START,
361    ) {
362        return Ok(elf_kernel);
363    }
364
365    if let Ok(lz4_kernel) =
366        kernel_loader::load_arm64_kernel_lz4(guest_mem, kernel_start, &mut kernel_image)
367    {
368        return Ok(lz4_kernel);
369    }
370
371    kernel_loader::load_arm64_kernel(guest_mem, kernel_start, kernel_image)
372        .map_err(Error::KernelLoadFailure)
373}
374
375pub struct AArch64;
376
377fn get_block_size() -> u64 {
378    let page_size = base::pagesize();
379    // Each PTE entry being 8 bytes long, we can fit in one page (page_size / 8)
380    // entries.
381    let ptes_per_page = page_size / 8;
382    let block_size = page_size * ptes_per_page;
383
384    block_size as u64
385}
386
387fn get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64> {
388    const MPIDR_AFF_MASK: u64 = 0xff_00ff_ffff;
389
390    Some(vcpus.get(index)?.get_mpidr().ok()? & MPIDR_AFF_MASK)
391}
392
393fn main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64 {
394    // Static swiotlb is allocated from the end of RAM as a separate memory region, so, if
395    // enabled, make the RAM memory region smaller to leave room for it.
396    let mut main_memory_size = components.memory_size;
397    if let Some(size) = components.swiotlb {
398        if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
399            main_memory_size -= size;
400        }
401    }
402    main_memory_size
403}
404
405pub struct ArchMemoryLayout {
406    pci_cam: AddressRange,
407    pci_mem: AddressRange,
408}
409
410impl arch::LinuxArch for AArch64 {
411    type Error = Error;
412    type ArchMemoryLayout = ArchMemoryLayout;
413
414    fn arch_memory_layout(
415        components: &VmComponents,
416    ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
417        let (pci_cam_start, pci_cam_size) = match components.pci_config.cam {
418            Some(MemoryRegionConfig { start, size }) => {
419                (start, size.unwrap_or(AARCH64_PCI_CAM_SIZE_DEFAULT))
420            }
421            None => (AARCH64_PCI_CAM_BASE_DEFAULT, AARCH64_PCI_CAM_SIZE_DEFAULT),
422        };
423        // TODO: Make the PCI slot allocator aware of the CAM size so we can remove this check.
424        if pci_cam_size != AARCH64_PCI_CAM_SIZE_DEFAULT {
425            return Err(Error::ConfigurePciCam(format!(
426                "PCI CAM size must be {AARCH64_PCI_CAM_SIZE_DEFAULT:#x}, got {pci_cam_size:#x}"
427            )));
428        }
429        let pci_cam = AddressRange::from_start_and_size(pci_cam_start, pci_cam_size).ok_or(
430            Error::ConfigurePciCam("PCI CAM region overflowed".to_string()),
431        )?;
432        if pci_cam.end >= AARCH64_PHYS_MEM_START {
433            return Err(Error::ConfigurePciCam(format!(
434                "PCI CAM ({pci_cam:?}) must be before start of RAM ({AARCH64_PHYS_MEM_START:#x})"
435            )));
436        }
437
438        let pci_mem = match components.pci_config.mem {
439            Some(MemoryRegionConfig { start, size }) => AddressRange::from_start_and_size(
440                start,
441                size.unwrap_or(AARCH64_PCI_MEM_SIZE_DEFAULT),
442            )
443            .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
444            None => AddressRange::from_start_and_size(
445                AARCH64_PCI_MEM_BASE_DEFAULT,
446                AARCH64_PCI_MEM_SIZE_DEFAULT,
447            )
448            .unwrap(),
449        };
450
451        Ok(ArchMemoryLayout { pci_cam, pci_mem })
452    }
453
454    /// Returns a Vec of the valid memory addresses.
455    /// These should be used to configure the GuestMemory structure for the platform.
456    fn guest_memory_layout(
457        components: &VmComponents,
458        _arch_memory_layout: &Self::ArchMemoryLayout,
459        hypervisor: &impl Hypervisor,
460    ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
461        let main_memory_size = main_memory_size(components, hypervisor);
462
463        let mut memory_regions = vec![(
464            GuestAddress(AARCH64_PHYS_MEM_START),
465            main_memory_size,
466            MemoryRegionOptions::new().align(get_block_size()),
467        )];
468
469        // Allocate memory for the pVM firmware.
470        if components.hv_cfg.protection_type.runs_firmware() {
471            memory_regions.push((
472                GuestAddress(AARCH64_PROTECTED_VM_FW_START),
473                AARCH64_PROTECTED_VM_FW_MAX_SIZE,
474                MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
475            ));
476        }
477
478        if let Some(size) = components.swiotlb {
479            if let Some(addr) = get_swiotlb_addr(components.memory_size, size, hypervisor) {
480                memory_regions.push((
481                    addr,
482                    size,
483                    MemoryRegionOptions::new().purpose(MemoryRegionPurpose::StaticSwiotlbRegion),
484                ));
485            }
486        }
487
488        Ok(memory_regions)
489    }
490
491    fn get_system_allocator_config<V: Vm>(
492        vm: &V,
493        arch_memory_layout: &Self::ArchMemoryLayout,
494    ) -> SystemAllocatorConfig {
495        let guest_phys_end = 1u64 << vm.get_guest_phys_addr_bits();
496        // The platform MMIO region is immediately past the end of RAM.
497        let plat_mmio_base = vm.get_memory().end_addr().offset();
498        let plat_mmio_size = AARCH64_PLATFORM_MMIO_SIZE;
499        // The high MMIO region is the rest of the address space after the platform MMIO region.
500        let high_mmio_base = plat_mmio_base + plat_mmio_size;
501        let high_mmio_size = guest_phys_end
502            .checked_sub(high_mmio_base)
503            .unwrap_or_else(|| {
504                panic!("guest_phys_end {guest_phys_end:#x} < high_mmio_base {high_mmio_base:#x}",);
505            });
506        SystemAllocatorConfig {
507            io: None,
508            low_mmio: arch_memory_layout.pci_mem,
509            high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
510                .expect("invalid high mmio region"),
511            platform_mmio: Some(
512                AddressRange::from_start_and_size(plat_mmio_base, plat_mmio_size)
513                    .expect("invalid platform mmio region"),
514            ),
515            first_irq: AARCH64_IRQ_BASE,
516        }
517    }
518
519    fn build_vm<V, Vcpu>(
520        mut components: VmComponents,
521        arch_memory_layout: &Self::ArchMemoryLayout,
522        _vm_evt_wrtube: &SendTube,
523        system_allocator: &mut SystemAllocator,
524        serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
525        serial_jail: Option<Minijail>,
526        (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>),
527        mut vm: V,
528        ramoops_region: Option<arch::pstore::RamoopsRegion>,
529        devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
530        irq_chip: &mut dyn IrqChipAArch64,
531        vcpu_ids: &mut Vec<usize>,
532        dump_device_tree_blob: Option<PathBuf>,
533        _debugcon_jail: Option<Minijail>,
534        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
535        _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
536        device_tree_overlays: Vec<DtbOverlay>,
537        fdt_position: Option<FdtPosition>,
538        no_pmu: bool,
539    ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
540    where
541        V: VmAArch64,
542        Vcpu: VcpuAArch64,
543    {
544        let has_bios = matches!(components.vm_image, VmImage::Bios(_));
545        let mem = vm.get_memory().clone();
546
547        let main_memory_size = main_memory_size(&components, vm.get_hypervisor());
548
549        // Load pvmfw early because it tells the hypervisor this is a pVM which affects
550        // the behavior of calls like Hypervisor::check_capability
551        if components.hv_cfg.protection_type.needs_firmware_loaded() {
552            arch::load_image(
553                &mem,
554                &mut components
555                    .pvm_fw
556                    .expect("pvmfw must be available if ProtectionType loads it"),
557                GuestAddress(AARCH64_PROTECTED_VM_FW_START),
558                AARCH64_PROTECTED_VM_FW_MAX_SIZE,
559            )
560            .map_err(Error::CustomPvmFwLoadFailure)?;
561        } else if components.hv_cfg.protection_type.runs_firmware() {
562            // Tell the hypervisor to load the pVM firmware.
563            vm.load_protected_vm_firmware(
564                GuestAddress(AARCH64_PROTECTED_VM_FW_START),
565                AARCH64_PROTECTED_VM_FW_MAX_SIZE,
566            )
567            .map_err(Error::PvmFwLoadFailure)?;
568        }
569
570        let fdt_position = fdt_position.unwrap_or(if has_bios {
571            FdtPosition::Start
572        } else {
573            FdtPosition::End
574        });
575        let payload_address = match fdt_position {
576            // If FDT is at the start RAM, the payload needs to go somewhere after it.
577            FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_FDT_MAX_SIZE),
578            // Otherwise, put the payload at the start of RAM.
579            FdtPosition::End | FdtPosition::AfterPayload => GuestAddress(AARCH64_PHYS_MEM_START),
580        };
581
582        // separate out image loading from other setup to get a specific error for
583        // image loading
584        let mut initrd = None;
585        let (payload, payload_end_address) = match components.vm_image {
586            VmImage::Bios(ref mut bios) => {
587                let image_size = arch::load_image(&mem, bios, payload_address, u64::MAX)
588                    .map_err(Error::BiosLoadFailure)?;
589                (
590                    PayloadType::Bios {
591                        entry: payload_address,
592                        image_size: image_size as u64,
593                    },
594                    payload_address
595                        .checked_add(image_size.try_into().unwrap())
596                        .and_then(|end| end.checked_sub(1))
597                        .unwrap(),
598                )
599            }
600            VmImage::Kernel(ref mut kernel_image) => {
601                let loaded_kernel = load_kernel(&mem, payload_address, kernel_image)?;
602                let kernel_end = loaded_kernel.address_range.end;
603                let mut payload_end = GuestAddress(kernel_end);
604                initrd = match components.initrd_image {
605                    Some(initrd_file) => {
606                        let mut initrd_file = initrd_file;
607                        let initrd_addr = (kernel_end + 1 + (AARCH64_INITRD_ALIGN - 1))
608                            & !(AARCH64_INITRD_ALIGN - 1);
609                        let initrd_max_size =
610                            main_memory_size.saturating_sub(initrd_addr - AARCH64_PHYS_MEM_START);
611                        let initrd_addr = GuestAddress(initrd_addr);
612                        let initrd_size =
613                            arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
614                                .map_err(Error::InitrdLoadFailure)?;
615                        payload_end = initrd_addr
616                            .checked_add(initrd_size.try_into().unwrap())
617                            .and_then(|end| end.checked_sub(1))
618                            .unwrap();
619                        Some((initrd_addr, initrd_size))
620                    }
621                    None => None,
622                };
623                (PayloadType::Kernel(loaded_kernel), payload_end)
624            }
625        };
626
627        let memory_end = GuestAddress(AARCH64_PHYS_MEM_START + main_memory_size);
628
629        let fdt_address = match fdt_position {
630            FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START),
631            FdtPosition::End => {
632                let addr = memory_end
633                    .checked_sub(AARCH64_FDT_MAX_SIZE)
634                    .expect("Not enough memory for FDT")
635                    .align_down(AARCH64_FDT_ALIGN);
636                assert!(addr > payload_end_address, "Not enough memory for FDT");
637                addr
638            }
639            FdtPosition::AfterPayload => payload_end_address
640                .checked_add(1)
641                .and_then(|addr| addr.align(AARCH64_FDT_ALIGN))
642                .expect("Not enough memory for FDT"),
643        };
644
645        let mut use_pmu = vm.check_capability(VmCap::ArmPmuV3);
646        use_pmu &= !no_pmu;
647        let vcpu_count = components.vcpu_count;
648        let mut has_pvtime = true;
649        let mut vcpus = Vec::with_capacity(vcpu_count);
650        let mut vcpu_init = Vec::with_capacity(vcpu_count);
651        for vcpu_id in 0..vcpu_count {
652            let vcpu: Vcpu = *vm
653                .create_vcpu(vcpu_id)
654                .map_err(Error::CreateVcpu)?
655                .downcast::<Vcpu>()
656                .map_err(|_| Error::DowncastVcpu)?;
657            let per_vcpu_init = if vm
658                .get_hypervisor()
659                .check_capability(HypervisorCap::HypervisorInitializedBootContext)
660            {
661                // No registers are initialized: VcpuInitAArch64.regs is an empty BTreeMap
662                Default::default()
663            } else {
664                Self::vcpu_init(
665                    vcpu_id,
666                    &payload,
667                    fdt_address,
668                    components.hv_cfg.protection_type,
669                    components.boot_cpu,
670                )
671            };
672            has_pvtime &= vcpu.has_pvtime_support();
673            vcpus.push(vcpu);
674            vcpu_ids.push(vcpu_id);
675            vcpu_init.push(per_vcpu_init);
676        }
677
678        let enable_sve = if components.sve_config.auto {
679            vm.check_capability(VmCap::Sve)
680        } else {
681            false
682        };
683
684        // Initialize Vcpus after all Vcpu objects have been created.
685        for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
686            let features = &Self::vcpu_features(vcpu_id, use_pmu, components.boot_cpu, enable_sve);
687            vcpu.init(features).map_err(Error::VcpuInit)?;
688        }
689
690        irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
691
692        if has_pvtime {
693            let pvtime_mem = MemoryMappingBuilder::new(AARCH64_PVTIME_IPA_MAX_SIZE as usize)
694                .build()
695                .map_err(Error::BuildPvtimeError)?;
696            vm.add_memory_region(
697                GuestAddress(AARCH64_PVTIME_IPA_START),
698                Box::new(pvtime_mem),
699                false,
700                false,
701                MemCacheType::CacheCoherent,
702            )
703            .map_err(Error::MapPvtimeError)?;
704        }
705
706        for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
707            use_pmu &= vcpu.init_pmu(AARCH64_PMU_IRQ as u64 + 16).is_ok();
708            if has_pvtime {
709                vcpu.init_pvtime(AARCH64_PVTIME_IPA_START + (vcpu_id as u64 * AARCH64_PVTIME_SIZE))
710                    .map_err(Error::InitPvtimeError)?;
711            }
712        }
713
714        let mmio_bus = Arc::new(devices::Bus::new(BusType::Mmio));
715
716        // ARM doesn't really use the io bus like x86, so just create an empty bus.
717        let io_bus = Arc::new(devices::Bus::new(BusType::Io));
718
719        // Event used by PMDevice to notify crosvm that
720        // guest OS is trying to suspend.
721        let (suspend_tube_send, suspend_tube_recv) =
722            Tube::directional_pair().map_err(Error::CreateTube)?;
723        let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
724
725        let (pci_devices, others): (Vec<_>, Vec<_>) = devs
726            .into_iter()
727            .partition(|(dev, _)| dev.as_pci_device().is_some());
728
729        let pci_devices = pci_devices
730            .into_iter()
731            .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
732            .collect();
733        let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
734            arch::generate_pci_root(
735                pci_devices,
736                irq_chip.as_irq_chip_mut(),
737                mmio_bus.clone(),
738                GuestAddress(arch_memory_layout.pci_cam.start),
739                8,
740                io_bus.clone(),
741                system_allocator,
742                &mut vm,
743                (devices::AARCH64_GIC_NR_SPIS - AARCH64_IRQ_BASE) as usize,
744                None,
745                #[cfg(feature = "swap")]
746                swap_controller,
747            )
748            .map_err(Error::CreatePciRoot)?;
749
750        let pci_root = Arc::new(Mutex::new(pci));
751        let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
752        let (platform_devices, _others): (Vec<_>, Vec<_>) = others
753            .into_iter()
754            .partition(|(dev, _)| dev.as_platform_device().is_some());
755
756        let platform_devices = platform_devices
757            .into_iter()
758            .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
759            .collect();
760        let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
761            arch::sys::linux::generate_platform_bus(
762                platform_devices,
763                irq_chip.as_irq_chip_mut(),
764                &mmio_bus,
765                system_allocator,
766                &mut vm,
767                #[cfg(feature = "swap")]
768                swap_controller,
769                components.hv_cfg.protection_type,
770            )
771            .map_err(Error::CreatePlatformBus)?;
772        pid_debug_label_map.append(&mut platform_pid_debug_label_map);
773
774        let (vmwdt_host_tube, vmwdt_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
775        Self::add_arch_devs(
776            irq_chip.as_irq_chip_mut(),
777            &mmio_bus,
778            vcpu_count,
779            _vm_evt_wrtube,
780            vmwdt_control_tube,
781        )?;
782
783        let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
784        let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
785        let serial_devices = arch::add_serial_devices(
786            components.hv_cfg.protection_type,
787            &mmio_bus,
788            (AARCH64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
789            (AARCH64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
790            serial_parameters,
791            serial_jail,
792            #[cfg(feature = "swap")]
793            swap_controller,
794        )
795        .map_err(Error::CreateSerialDevices)?;
796
797        let source = IrqEventSource {
798            device_id: Serial::device_id(),
799            queue_id: 0,
800            device_name: Serial::debug_label(),
801        };
802        irq_chip
803            .register_edge_irq_event(AARCH64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
804            .map_err(Error::RegisterIrqfd)?;
805        irq_chip
806            .register_edge_irq_event(AARCH64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
807            .map_err(Error::RegisterIrqfd)?;
808
809        mmio_bus
810            .insert(
811                pci_bus,
812                arch_memory_layout.pci_cam.start,
813                arch_memory_layout.pci_cam.len().unwrap(),
814            )
815            .map_err(Error::RegisterPci)?;
816
817        let (vcpufreq_host_tube, vcpufreq_control_tube) =
818            Tube::pair().map_err(Error::CreateTube)?;
819        let vcpufreq_shared_tube = Arc::new(Mutex::new(vcpufreq_control_tube));
820        #[cfg(any(target_os = "android", target_os = "linux"))]
821        if !components.cpu_frequencies.is_empty() {
822            let mut freq_domain_vcpus: BTreeMap<u32, Vec<usize>> = BTreeMap::new();
823            let mut freq_domain_perfs: BTreeMap<u32, Arc<AtomicU32>> = BTreeMap::new();
824            let mut vcpu_affinities: Vec<u32> = Vec::new();
825            for vcpu in 0..vcpu_count {
826                let freq_domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
827                freq_domain_vcpus.entry(freq_domain).or_default().push(vcpu);
828                let vcpu_affinity = match components.vcpu_affinity.clone() {
829                    Some(VcpuAffinity::Global(v)) => v,
830                    Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&vcpu).unwrap_or_default(),
831                    None => panic!("vcpu_affinity needs to be set for VirtCpufreq"),
832                };
833                vcpu_affinities.push(vcpu_affinity[0].try_into().unwrap());
834            }
835            for domain in freq_domain_vcpus.keys() {
836                let domain_perf = Arc::new(AtomicU32::new(0));
837                freq_domain_perfs.insert(*domain, domain_perf);
838            }
839            let largest_vcpu_affinity_idx = *vcpu_affinities.iter().max().unwrap() as usize;
840            for (vcpu, vcpu_affinity) in vcpu_affinities.iter().enumerate() {
841                let mut virtfreq_size = AARCH64_VIRTFREQ_SIZE;
842                if components.virt_cpufreq_v2 {
843                    let domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
844                    virtfreq_size = AARCH64_VIRTFREQ_V2_SIZE;
845                    let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreqV2::new(
846                        *vcpu_affinity,
847                        components.cpu_frequencies.get(&vcpu).unwrap().clone(),
848                        components.vcpu_domain_paths.get(&vcpu).cloned(),
849                        domain,
850                        *components.normalized_cpu_ipc_ratios.get(&vcpu).unwrap(),
851                        largest_vcpu_affinity_idx,
852                        vcpufreq_shared_tube.clone(),
853                        freq_domain_vcpus.get(&domain).unwrap().clone(),
854                        freq_domain_perfs.get(&domain).unwrap().clone(),
855                    )));
856                    mmio_bus
857                        .insert(
858                            virt_cpufreq,
859                            AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
860                            virtfreq_size,
861                        )
862                        .map_err(Error::RegisterVirtCpufreq)?;
863                } else {
864                    let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreq::new(
865                        *vcpu_affinity,
866                        *components.cpu_capacity.get(&vcpu).unwrap(),
867                        *components
868                            .cpu_frequencies
869                            .get(&vcpu)
870                            .unwrap()
871                            .iter()
872                            .max()
873                            .unwrap(),
874                    )));
875                    mmio_bus
876                        .insert(
877                            virt_cpufreq,
878                            AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
879                            virtfreq_size,
880                        )
881                        .map_err(Error::RegisterVirtCpufreq)?;
882                }
883
884                if vcpu as u64 * AARCH64_VIRTFREQ_SIZE + virtfreq_size > AARCH64_VIRTFREQ_MAXSIZE {
885                    panic!("Exceeded maximum number of virt cpufreq devices");
886                }
887            }
888        }
889
890        let mut cmdline = Self::get_base_linux_cmdline();
891        get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
892            .map_err(Error::GetSerialCmdline)?;
893        for param in components.extra_kernel_params {
894            cmdline.insert_str(&param).map_err(Error::Cmdline)?;
895        }
896
897        if let Some(ramoops_region) = ramoops_region {
898            arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
899                .map_err(Error::Cmdline)?;
900        }
901
902        let psci_version = vcpus[0].get_psci_version().map_err(Error::GetPsciVersion)?;
903
904        let pci_cfg = fdt::PciConfigRegion {
905            base: arch_memory_layout.pci_cam.start,
906            size: arch_memory_layout.pci_cam.len().unwrap(),
907        };
908
909        let mut pci_ranges: Vec<fdt::PciRange> = Vec::new();
910
911        let mut add_pci_ranges =
912            |alloc: &AddressAllocator, space: PciAddressSpace, prefetchable: bool| {
913                pci_ranges.extend(alloc.pools().iter().map(|range| fdt::PciRange {
914                    space,
915                    bus_address: range.start,
916                    cpu_physical_address: range.start,
917                    size: range.len().unwrap(),
918                    prefetchable,
919                }));
920            };
921
922        add_pci_ranges(
923            system_allocator.mmio_allocator(MmioType::Low),
924            PciAddressSpace::Memory,
925            false, // prefetchable
926        );
927        add_pci_ranges(
928            system_allocator.mmio_allocator(MmioType::High),
929            PciAddressSpace::Memory64,
930            true, // prefetchable
931        );
932
933        let (bat_control, bat_mmio_base_and_irq) = match bat_type {
934            Some(BatteryType::Goldfish) => {
935                let bat_irq = AARCH64_BAT_IRQ;
936
937                // a dummy AML buffer. Aarch64 crosvm doesn't use ACPI.
938                let mut amls = Vec::new();
939                let (control_tube, mmio_base) = arch::sys::linux::add_goldfish_battery(
940                    &mut amls,
941                    bat_jail,
942                    &mmio_bus,
943                    irq_chip.as_irq_chip_mut(),
944                    bat_irq,
945                    system_allocator,
946                    #[cfg(feature = "swap")]
947                    swap_controller,
948                )
949                .map_err(Error::CreateBatDevices)?;
950                (
951                    Some(BatControl {
952                        type_: BatteryType::Goldfish,
953                        control_tube,
954                    }),
955                    Some((mmio_base, bat_irq)),
956                )
957            }
958            None => (None, None),
959        };
960
961        let vmwdt_cfg = fdt::VmWdtConfig {
962            base: AARCH64_VMWDT_ADDR,
963            size: AARCH64_VMWDT_SIZE,
964            clock_hz: VMWDT_DEFAULT_CLOCK_HZ,
965            timeout_sec: VMWDT_DEFAULT_TIMEOUT_SEC,
966        };
967
968        fdt::create_fdt(
969            AARCH64_FDT_MAX_SIZE as usize,
970            &mem,
971            pci_irqs,
972            pci_cfg,
973            &pci_ranges,
974            dev_resources,
975            vcpu_count as u32,
976            &|n| get_vcpu_mpidr_aff(&vcpus, n),
977            components.cpu_clusters,
978            components.cpu_capacity,
979            components.cpu_frequencies,
980            fdt_address,
981            cmdline
982                .as_str_with_max_len(AARCH64_CMDLINE_MAX_SIZE - 1)
983                .map_err(Error::Cmdline)?,
984            payload.address_range(),
985            initrd,
986            components.android_fstab,
987            irq_chip.get_vgic_version() == DeviceKind::ArmVgicV3,
988            irq_chip.has_vgic_its(),
989            use_pmu,
990            psci_version,
991            components.swiotlb.map(|size| {
992                (
993                    get_swiotlb_addr(components.memory_size, size, vm.get_hypervisor()),
994                    size,
995                )
996            }),
997            bat_mmio_base_and_irq,
998            vmwdt_cfg,
999            dump_device_tree_blob,
1000            &|writer, phandles| vm.create_fdt(writer, phandles),
1001            components.dynamic_power_coefficient,
1002            device_tree_overlays,
1003            &serial_devices,
1004            components.virt_cpufreq_v2,
1005        )
1006        .map_err(Error::CreateFdt)?;
1007
1008        vm.init_arch(
1009            payload.entry(),
1010            fdt_address,
1011            AARCH64_FDT_MAX_SIZE.try_into().unwrap(),
1012        )
1013        .map_err(Error::InitVmError)?;
1014
1015        let vm_request_tubes = vec![vmwdt_host_tube, vcpufreq_host_tube];
1016
1017        Ok(RunnableLinuxVm {
1018            vm,
1019            vcpu_count,
1020            vcpus: Some(vcpus),
1021            vcpu_init,
1022            vcpu_affinity: components.vcpu_affinity,
1023            no_smt: components.no_smt,
1024            irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
1025            io_bus,
1026            mmio_bus,
1027            pid_debug_label_map,
1028            suspend_tube: (suspend_tube_send, suspend_tube_recv),
1029            rt_cpus: components.rt_cpus,
1030            delay_rt: components.delay_rt,
1031            bat_control,
1032            pm: None,
1033            resume_notify_devices: Vec::new(),
1034            root_config: pci_root,
1035            platform_devices,
1036            hotplug_bus: BTreeMap::new(),
1037            devices_thread: None,
1038            vm_request_tubes,
1039        })
1040    }
1041
1042    fn configure_vcpu<V: Vm>(
1043        _vm: &V,
1044        _hypervisor: &dyn Hypervisor,
1045        _irq_chip: &mut dyn IrqChipAArch64,
1046        vcpu: &mut dyn VcpuAArch64,
1047        vcpu_init: VcpuInitAArch64,
1048        _vcpu_id: usize,
1049        _num_cpus: usize,
1050        _cpu_config: Option<CpuConfigAArch64>,
1051    ) -> std::result::Result<(), Self::Error> {
1052        for (reg, value) in vcpu_init.regs.iter() {
1053            vcpu.set_one_reg(*reg, *value).map_err(Error::SetReg)?;
1054        }
1055        Ok(())
1056    }
1057
1058    fn register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>(
1059        _linux: &mut RunnableLinuxVm<V, Vcpu>,
1060        _device: Box<dyn PciDevice>,
1061        _minijail: Option<Minijail>,
1062        _resources: &mut SystemAllocator,
1063        _tube: &mpsc::Sender<PciRootCommand>,
1064        #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
1065    ) -> std::result::Result<PciAddress, Self::Error> {
1066        // hotplug function isn't verified on AArch64, so set it unsupported here.
1067        Err(Error::Unsupported)
1068    }
1069
1070    fn get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1071        Ok(Self::collect_for_each_cpu(base::logical_core_max_freq_khz)
1072            .map_err(Error::CpuFrequencies)?
1073            .into_iter()
1074            .enumerate()
1075            .collect())
1076    }
1077
1078    fn get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>
1079    {
1080        Ok(
1081            Self::collect_for_each_cpu(base::logical_core_frequencies_khz)
1082                .map_err(Error::CpuFrequencies)?
1083                .into_iter()
1084                .enumerate()
1085                .collect(),
1086        )
1087    }
1088
1089    // Returns a (cpu_id -> value) map of the DMIPS/MHz capacities of logical cores
1090    // in the host system.
1091    fn get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1092        Ok(Self::collect_for_each_cpu(base::logical_core_capacity)
1093            .map_err(Error::CpuTopology)?
1094            .into_iter()
1095            .enumerate()
1096            .collect())
1097    }
1098
1099    // Creates CPU cluster mask for each CPU in the host system.
1100    fn get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error> {
1101        let cluster_ids = Self::collect_for_each_cpu(base::logical_core_cluster_id)
1102            .map_err(Error::CpuTopology)?;
1103        let mut unique_clusters: Vec<CpuSet> = cluster_ids
1104            .iter()
1105            .map(|&vcpu_cluster_id| {
1106                cluster_ids
1107                    .iter()
1108                    .enumerate()
1109                    .filter(|(_, &cpu_cluster_id)| vcpu_cluster_id == cpu_cluster_id)
1110                    .map(|(cpu_id, _)| cpu_id)
1111                    .collect()
1112            })
1113            .collect();
1114        unique_clusters.sort_unstable();
1115        unique_clusters.dedup();
1116        Ok(unique_clusters)
1117    }
1118}
1119
1120#[cfg(feature = "gdb")]
1121impl<T: VcpuAArch64> arch::GdbOps<T> for AArch64 {
1122    type Error = Error;
1123
1124    fn read_memory(
1125        _vcpu: &T,
1126        guest_mem: &GuestMemory,
1127        vaddr: GuestAddress,
1128        len: usize,
1129    ) -> Result<Vec<u8>> {
1130        let mut buf = vec![0; len];
1131
1132        guest_mem
1133            .read_exact_at_addr(&mut buf, vaddr)
1134            .map_err(Error::ReadGuestMemory)?;
1135
1136        Ok(buf)
1137    }
1138
1139    fn write_memory(
1140        _vcpu: &T,
1141        guest_mem: &GuestMemory,
1142        vaddr: GuestAddress,
1143        buf: &[u8],
1144    ) -> Result<()> {
1145        guest_mem
1146            .write_all_at_addr(buf, vaddr)
1147            .map_err(Error::WriteGuestMemory)
1148    }
1149
1150    fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
1151        let mut regs: <GdbArch as Arch>::Registers = Default::default();
1152        assert!(
1153            regs.x.len() == 31,
1154            "unexpected number of Xn general purpose registers"
1155        );
1156        for (i, reg) in regs.x.iter_mut().enumerate() {
1157            let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1158            *reg = vcpu
1159                .get_one_reg(VcpuRegAArch64::X(n))
1160                .map_err(Error::ReadReg)?;
1161        }
1162        regs.sp = vcpu
1163            .get_one_reg(VcpuRegAArch64::Sp)
1164            .map_err(Error::ReadReg)?;
1165        regs.pc = vcpu
1166            .get_one_reg(VcpuRegAArch64::Pc)
1167            .map_err(Error::ReadReg)?;
1168        // hypervisor API gives a 64-bit value for Pstate, but GDB wants a 32-bit "CPSR".
1169        regs.cpsr = vcpu
1170            .get_one_reg(VcpuRegAArch64::Pstate)
1171            .map_err(Error::ReadReg)? as u32;
1172        for (i, reg) in regs.v.iter_mut().enumerate() {
1173            let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1174            *reg = vcpu.get_vector_reg(n).map_err(Error::ReadReg)?;
1175        }
1176        regs.fpcr = vcpu
1177            .get_one_reg(VcpuRegAArch64::System(aarch64_sys_reg::FPCR))
1178            .map_err(Error::ReadReg)? as u32;
1179        regs.fpsr = vcpu
1180            .get_one_reg(VcpuRegAArch64::System(aarch64_sys_reg::FPSR))
1181            .map_err(Error::ReadReg)? as u32;
1182
1183        Ok(regs)
1184    }
1185
1186    fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()> {
1187        assert!(
1188            regs.x.len() == 31,
1189            "unexpected number of Xn general purpose registers"
1190        );
1191        for (i, reg) in regs.x.iter().enumerate() {
1192            let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1193            vcpu.set_one_reg(VcpuRegAArch64::X(n), *reg)
1194                .map_err(Error::WriteReg)?;
1195        }
1196        vcpu.set_one_reg(VcpuRegAArch64::Sp, regs.sp)
1197            .map_err(Error::WriteReg)?;
1198        vcpu.set_one_reg(VcpuRegAArch64::Pc, regs.pc)
1199            .map_err(Error::WriteReg)?;
1200        // GDB gives a 32-bit value for "CPSR", but hypervisor API wants a 64-bit Pstate.
1201        let pstate = vcpu
1202            .get_one_reg(VcpuRegAArch64::Pstate)
1203            .map_err(Error::ReadReg)?;
1204        let pstate = (pstate & 0xffff_ffff_0000_0000) | (regs.cpsr as u64);
1205        vcpu.set_one_reg(VcpuRegAArch64::Pstate, pstate)
1206            .map_err(Error::WriteReg)?;
1207        for (i, reg) in regs.v.iter().enumerate() {
1208            let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1209            vcpu.set_vector_reg(n, *reg).map_err(Error::WriteReg)?;
1210        }
1211        vcpu.set_one_reg(
1212            VcpuRegAArch64::System(aarch64_sys_reg::FPCR),
1213            u64::from(regs.fpcr),
1214        )
1215        .map_err(Error::WriteReg)?;
1216        vcpu.set_one_reg(
1217            VcpuRegAArch64::System(aarch64_sys_reg::FPSR),
1218            u64::from(regs.fpsr),
1219        )
1220        .map_err(Error::WriteReg)?;
1221
1222        Ok(())
1223    }
1224
1225    fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
1226        let result = match reg_id {
1227            AArch64RegId::X(n) => vcpu
1228                .get_one_reg(VcpuRegAArch64::X(n))
1229                .map(|v| v.to_ne_bytes().to_vec()),
1230            AArch64RegId::Sp => vcpu
1231                .get_one_reg(VcpuRegAArch64::Sp)
1232                .map(|v| v.to_ne_bytes().to_vec()),
1233            AArch64RegId::Pc => vcpu
1234                .get_one_reg(VcpuRegAArch64::Pc)
1235                .map(|v| v.to_ne_bytes().to_vec()),
1236            AArch64RegId::Pstate => vcpu
1237                .get_one_reg(VcpuRegAArch64::Pstate)
1238                .map(|v| (v as u32).to_ne_bytes().to_vec()),
1239            AArch64RegId::V(n) => vcpu.get_vector_reg(n).map(|v| v.to_ne_bytes().to_vec()),
1240            AArch64RegId::System(op) => vcpu
1241                .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)))
1242                .map(|v| v.to_ne_bytes().to_vec()),
1243            _ => {
1244                base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1245                Err(base::Error::new(libc::EINVAL))
1246            }
1247        };
1248
1249        match result {
1250            Ok(bytes) => Ok(bytes),
1251            // ENOENT is returned when KVM is aware of the register but it is unavailable
1252            Err(e) if e.errno() == libc::ENOENT => Ok(Vec::new()),
1253            Err(e) => Err(Error::ReadReg(e)),
1254        }
1255    }
1256
1257    fn write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()> {
1258        fn try_into_u32(data: &[u8]) -> Result<u32> {
1259            let s = data
1260                .get(..4)
1261                .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1262            let a = s
1263                .try_into()
1264                .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1265            Ok(u32::from_ne_bytes(a))
1266        }
1267
1268        fn try_into_u64(data: &[u8]) -> Result<u64> {
1269            let s = data
1270                .get(..8)
1271                .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1272            let a = s
1273                .try_into()
1274                .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1275            Ok(u64::from_ne_bytes(a))
1276        }
1277
1278        fn try_into_u128(data: &[u8]) -> Result<u128> {
1279            let s = data
1280                .get(..16)
1281                .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1282            let a = s
1283                .try_into()
1284                .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1285            Ok(u128::from_ne_bytes(a))
1286        }
1287
1288        match reg_id {
1289            AArch64RegId::X(n) => vcpu.set_one_reg(VcpuRegAArch64::X(n), try_into_u64(data)?),
1290            AArch64RegId::Sp => vcpu.set_one_reg(VcpuRegAArch64::Sp, try_into_u64(data)?),
1291            AArch64RegId::Pc => vcpu.set_one_reg(VcpuRegAArch64::Pc, try_into_u64(data)?),
1292            AArch64RegId::Pstate => {
1293                vcpu.set_one_reg(VcpuRegAArch64::Pstate, u64::from(try_into_u32(data)?))
1294            }
1295            AArch64RegId::V(n) => vcpu.set_vector_reg(n, try_into_u128(data)?),
1296            AArch64RegId::System(op) => vcpu.set_one_reg(
1297                VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)),
1298                try_into_u64(data)?,
1299            ),
1300            _ => {
1301                base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1302                Err(base::Error::new(libc::EINVAL))
1303            }
1304        }
1305        .map_err(Error::WriteReg)
1306    }
1307
1308    fn enable_singlestep(vcpu: &T) -> Result<()> {
1309        const SINGLE_STEP: bool = true;
1310        vcpu.set_guest_debug(&[], SINGLE_STEP)
1311            .map_err(Error::EnableSinglestep)
1312    }
1313
1314    fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize> {
1315        vcpu.get_max_hw_bps().map_err(Error::GetMaxHwBreakPoint)
1316    }
1317
1318    fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
1319        const SINGLE_STEP: bool = false;
1320        vcpu.set_guest_debug(breakpoints, SINGLE_STEP)
1321            .map_err(Error::SetHwBreakpoint)
1322    }
1323}
1324
1325impl AArch64 {
1326    /// This returns a base part of the kernel command for this architecture
1327    fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1328        let mut cmdline = kernel_cmdline::Cmdline::new();
1329        cmdline.insert_str("panic=-1").unwrap();
1330        cmdline
1331    }
1332
1333    /// This adds any early platform devices for this architecture.
1334    ///
1335    /// # Arguments
1336    ///
1337    /// * `irq_chip` - The IRQ chip to add irqs to.
1338    /// * `bus` - The bus to add devices to.
1339    /// * `vcpu_count` - The number of virtual CPUs for this guest VM
1340    /// * `vm_evt_wrtube` - The notification channel
1341    fn add_arch_devs(
1342        irq_chip: &mut dyn IrqChip,
1343        bus: &Bus,
1344        vcpu_count: usize,
1345        vm_evt_wrtube: &SendTube,
1346        vmwdt_request_tube: Tube,
1347    ) -> Result<()> {
1348        let rtc_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1349        let rtc = devices::pl030::Pl030::new(rtc_evt.try_clone().map_err(Error::CloneEvent)?);
1350        irq_chip
1351            .register_edge_irq_event(AARCH64_RTC_IRQ, &rtc_evt, IrqEventSource::from_device(&rtc))
1352            .map_err(Error::RegisterIrqfd)?;
1353
1354        bus.insert(
1355            Arc::new(Mutex::new(rtc)),
1356            AARCH64_RTC_ADDR,
1357            AARCH64_RTC_SIZE,
1358        )
1359        .expect("failed to add rtc device");
1360
1361        let vmwdt_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1362        let vm_wdt = devices::vmwdt::Vmwdt::new(
1363            vcpu_count,
1364            vm_evt_wrtube.try_clone().unwrap(),
1365            vmwdt_evt.try_clone().map_err(Error::CloneEvent)?,
1366            vmwdt_request_tube,
1367        )
1368        .map_err(Error::CreateVmwdtDevice)?;
1369        irq_chip
1370            .register_edge_irq_event(
1371                AARCH64_VMWDT_IRQ,
1372                &vmwdt_evt,
1373                IrqEventSource::from_device(&vm_wdt),
1374            )
1375            .map_err(Error::RegisterIrqfd)?;
1376
1377        bus.insert(
1378            Arc::new(Mutex::new(vm_wdt)),
1379            AARCH64_VMWDT_ADDR,
1380            AARCH64_VMWDT_SIZE,
1381        )
1382        .expect("failed to add vmwdt device");
1383
1384        Ok(())
1385    }
1386
1387    /// Get ARM-specific features for vcpu with index `vcpu_id`.
1388    ///
1389    /// # Arguments
1390    ///
1391    /// * `vcpu_id` - The VM's index for `vcpu`.
1392    /// * `use_pmu` - Should `vcpu` be configured to use the Performance Monitor Unit.
1393    fn vcpu_features(
1394        vcpu_id: usize,
1395        use_pmu: bool,
1396        boot_cpu: usize,
1397        enable_sve: bool,
1398    ) -> Vec<VcpuFeature> {
1399        let mut features = vec![VcpuFeature::PsciV0_2];
1400        if use_pmu {
1401            features.push(VcpuFeature::PmuV3);
1402        }
1403        // Non-boot cpus are powered off initially
1404        if vcpu_id != boot_cpu {
1405            features.push(VcpuFeature::PowerOff);
1406        }
1407        if enable_sve {
1408            features.push(VcpuFeature::Sve);
1409        }
1410
1411        features
1412    }
1413
1414    /// Get initial register state for vcpu with index `vcpu_id`.
1415    ///
1416    /// # Arguments
1417    ///
1418    /// * `vcpu_id` - The VM's index for `vcpu`.
1419    fn vcpu_init(
1420        vcpu_id: usize,
1421        payload: &PayloadType,
1422        fdt_address: GuestAddress,
1423        protection_type: ProtectionType,
1424        boot_cpu: usize,
1425    ) -> VcpuInitAArch64 {
1426        let mut regs: BTreeMap<VcpuRegAArch64, u64> = Default::default();
1427
1428        // All interrupts masked
1429        let pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
1430        regs.insert(VcpuRegAArch64::Pstate, pstate);
1431
1432        // Other cpus are powered off initially
1433        if vcpu_id == boot_cpu {
1434            let entry_addr = if protection_type.needs_firmware_loaded() {
1435                Some(AARCH64_PROTECTED_VM_FW_START)
1436            } else if protection_type.runs_firmware() {
1437                None // Initial PC value is set by the hypervisor
1438            } else {
1439                Some(payload.entry().offset())
1440            };
1441
1442            /* PC -- entry point */
1443            if let Some(entry) = entry_addr {
1444                regs.insert(VcpuRegAArch64::Pc, entry);
1445            }
1446
1447            /* X0 -- fdt address */
1448            regs.insert(VcpuRegAArch64::X(0), fdt_address.offset());
1449
1450            if protection_type.runs_firmware() {
1451                /* X1 -- payload entry point */
1452                regs.insert(VcpuRegAArch64::X(1), payload.entry().offset());
1453
1454                /* X2 -- image size */
1455                regs.insert(VcpuRegAArch64::X(2), payload.size());
1456            }
1457        }
1458
1459        VcpuInitAArch64 { regs }
1460    }
1461
1462    fn collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error>
1463    where
1464        F: Fn(usize) -> std::result::Result<T, base::Error>,
1465    {
1466        (0..base::number_of_logical_cores()?).map(func).collect()
1467    }
1468}
1469
1470#[cfg(test)]
1471mod tests {
1472    use super::*;
1473
1474    #[test]
1475    fn vcpu_init_unprotected_kernel() {
1476        let payload = PayloadType::Kernel(LoadedKernel {
1477            address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1478            size: 0x1000,
1479            entry: GuestAddress(0x8080_0000),
1480            class: kernel_loader::ElfClass::ElfClass64,
1481        });
1482        assert_eq!(
1483            payload.address_range(),
1484            AddressRange {
1485                start: 0x8080_0000,
1486                end: 0x8080_0fff
1487            }
1488        );
1489        let fdt_address = GuestAddress(0x1234);
1490        let prot = ProtectionType::Unprotected;
1491
1492        let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1493
1494        // PC: kernel image entry point
1495        assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8080_0000));
1496
1497        // X0: fdt_offset
1498        assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1499    }
1500
1501    #[test]
1502    fn vcpu_init_unprotected_bios() {
1503        let payload = PayloadType::Bios {
1504            entry: GuestAddress(0x8020_0000),
1505            image_size: 0x1000,
1506        };
1507        assert_eq!(
1508            payload.address_range(),
1509            AddressRange {
1510                start: 0x8020_0000,
1511                end: 0x8020_0fff
1512            }
1513        );
1514        let fdt_address = GuestAddress(0x1234);
1515        let prot = ProtectionType::Unprotected;
1516
1517        let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1518
1519        // PC: bios image entry point
1520        assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8020_0000));
1521
1522        // X0: fdt_offset
1523        assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1524    }
1525
1526    #[test]
1527    fn vcpu_init_protected_kernel() {
1528        let payload = PayloadType::Kernel(LoadedKernel {
1529            address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1530            size: 0x1000,
1531            entry: GuestAddress(0x8080_0000),
1532            class: kernel_loader::ElfClass::ElfClass64,
1533        });
1534        assert_eq!(
1535            payload.address_range(),
1536            AddressRange {
1537                start: 0x8080_0000,
1538                end: 0x8080_0fff
1539            }
1540        );
1541        let fdt_address = GuestAddress(0x1234);
1542        let prot = ProtectionType::Protected;
1543
1544        let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1545
1546        // The hypervisor provides the initial value of PC, so PC should not be present in the
1547        // vcpu_init register map.
1548        assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), None);
1549
1550        // X0: fdt_offset
1551        assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1552
1553        // X1: kernel image entry point
1554        assert_eq!(
1555            vcpu_init.regs.get(&VcpuRegAArch64::X(1)),
1556            Some(&0x8080_0000)
1557        );
1558
1559        // X2: image size
1560        assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(2)), Some(&0x1000));
1561    }
1562}