x86_64/
lib.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! x86 architecture support.
6
7#![cfg(target_arch = "x86_64")]
8
9mod fdt;
10
11#[cfg(feature = "gdb")]
12mod gdb;
13
14const SETUP_DTB: u32 = 2;
15const SETUP_RNG_SEED: u32 = 9;
16
17#[allow(dead_code)]
18#[allow(non_upper_case_globals)]
19#[allow(non_camel_case_types)]
20#[allow(non_snake_case)]
21pub mod bootparam;
22
23#[allow(dead_code)]
24#[allow(non_upper_case_globals)]
25mod msr_index;
26
27#[allow(dead_code)]
28#[allow(non_upper_case_globals)]
29#[allow(non_camel_case_types)]
30#[allow(clippy::all)]
31mod mpspec;
32
33pub mod multiboot_spec;
34
35pub mod acpi;
36mod bzimage;
37pub mod cpuid;
38mod gdt;
39pub mod interrupts;
40pub mod mptable;
41pub mod regs;
42pub mod smbios;
43
44use std::arch::x86_64::CpuidResult;
45use std::cmp::min;
46use std::collections::BTreeMap;
47use std::fmt;
48use std::fs::File;
49use std::io;
50use std::io::Write;
51use std::mem;
52use std::path::PathBuf;
53use std::sync::mpsc;
54use std::sync::Arc;
55
56use acpi_tables::aml;
57use acpi_tables::aml::Aml;
58use acpi_tables::sdt::SDT;
59use anyhow::Context;
60use arch::get_serial_cmdline;
61use arch::serial::SerialDeviceInfo;
62use arch::CpuSet;
63use arch::DtbOverlay;
64use arch::FdtPosition;
65use arch::GetSerialCmdlineError;
66use arch::MemoryRegionConfig;
67use arch::PciConfig;
68use arch::RunnableLinuxVm;
69use arch::VmComponents;
70use arch::VmImage;
71use base::debug;
72use base::info;
73use base::warn;
74#[cfg(any(target_os = "android", target_os = "linux"))]
75use base::AsRawDescriptors;
76use base::Event;
77use base::FileGetLen;
78use base::FileReadWriteAtVolatile;
79use base::SendTube;
80use base::Tube;
81use base::TubeError;
82use chrono::Utc;
83pub use cpuid::adjust_cpuid;
84pub use cpuid::CpuIdContext;
85use devices::acpi::PM_WAKEUP_GPIO;
86use devices::Bus;
87use devices::BusDevice;
88use devices::BusDeviceObj;
89use devices::BusResumeDevice;
90use devices::BusType;
91use devices::Debugcon;
92use devices::FwCfgParameters;
93use devices::IrqChip;
94use devices::IrqChipX86_64;
95use devices::IrqEventSource;
96use devices::PciAddress;
97use devices::PciConfigIo;
98use devices::PciConfigMmio;
99use devices::PciDevice;
100use devices::PciInterruptPin;
101use devices::PciRoot;
102use devices::PciRootCommand;
103use devices::PciVirtualConfigMmio;
104use devices::Pflash;
105#[cfg(any(target_os = "android", target_os = "linux"))]
106use devices::ProxyDevice;
107use devices::Serial;
108use devices::SerialHardware;
109use devices::SerialParameters;
110use devices::VirtualPmc;
111use devices::FW_CFG_BASE_PORT;
112use devices::FW_CFG_MAX_FILE_SLOTS;
113use devices::FW_CFG_WIDTH;
114use hypervisor::CpuConfigX86_64;
115use hypervisor::Hypervisor;
116use hypervisor::HypervisorX86_64;
117use hypervisor::ProtectionType;
118use hypervisor::VcpuInitX86_64;
119use hypervisor::VcpuX86_64;
120use hypervisor::Vm;
121use hypervisor::VmCap;
122use hypervisor::VmX86_64;
123#[cfg(feature = "seccomp_trace")]
124use jail::read_jail_addr;
125#[cfg(windows)]
126use jail::FakeMinijailStub as Minijail;
127#[cfg(any(target_os = "android", target_os = "linux"))]
128use minijail::Minijail;
129use mptable::MPTABLE_RANGE;
130use multiboot_spec::MultibootInfo;
131use multiboot_spec::MultibootMmapEntry;
132use multiboot_spec::MULTIBOOT_BOOTLOADER_MAGIC;
133use remain::sorted;
134use resources::AddressRange;
135use resources::SystemAllocator;
136use resources::SystemAllocatorConfig;
137use sync::Condvar;
138use sync::Mutex;
139use thiserror::Error;
140use vm_control::BatControl;
141use vm_control::BatteryType;
142use vm_memory::GuestAddress;
143use vm_memory::GuestMemory;
144use vm_memory::GuestMemoryError;
145use vm_memory::MemoryRegionOptions;
146use vm_memory::MemoryRegionPurpose;
147use zerocopy::FromBytes;
148use zerocopy::Immutable;
149use zerocopy::IntoBytes;
150use zerocopy::KnownLayout;
151
152use crate::bootparam::boot_params;
153use crate::bootparam::setup_header;
154use crate::bootparam::XLF_CAN_BE_LOADED_ABOVE_4G;
155use crate::cpuid::EDX_HYBRID_CPU_SHIFT;
156
157#[sorted]
158#[derive(Error, Debug)]
159pub enum Error {
160    #[error("error allocating a single gpe")]
161    AllocateGpe,
162    #[error("error allocating IO resource: {0}")]
163    AllocateIOResouce(resources::Error),
164    #[error("error allocating a single irq")]
165    AllocateIrq,
166    #[error("unable to clone an Event: {0}")]
167    CloneEvent(base::Error),
168    #[error("failed to clone IRQ chip: {0}")]
169    CloneIrqChip(base::Error),
170    #[cfg(any(target_os = "android", target_os = "linux"))]
171    #[error("failed to clone jail: {0}")]
172    CloneJail(minijail::Error),
173    #[error("unable to clone a Tube: {0}")]
174    CloneTube(TubeError),
175    #[error("the given kernel command line was invalid: {0}")]
176    Cmdline(kernel_cmdline::Error),
177    #[error("failed writing command line to guest memory")]
178    CommandLineCopy,
179    #[error("command line overflowed guest memory")]
180    CommandLineOverflow,
181    #[error("failed to configure hotplugged pci device: {0}")]
182    ConfigurePciDevice(arch::DeviceRegistrationError),
183    #[error("bad PCI ECAM configuration: {0}")]
184    ConfigurePciEcam(String),
185    #[error("bad PCI mem configuration: {0}")]
186    ConfigurePciMem(String),
187    #[error("failed to configure segment registers: {0}")]
188    ConfigureSegments(regs::Error),
189    #[error("error configuring the system")]
190    ConfigureSystem,
191    #[error("unable to create ACPI tables")]
192    CreateAcpi,
193    #[error("unable to create battery devices: {0}")]
194    CreateBatDevices(arch::DeviceRegistrationError),
195    #[error("could not create debugcon device: {0}")]
196    CreateDebugconDevice(devices::SerialError),
197    #[error("unable to make an Event: {0}")]
198    CreateEvent(base::Error),
199    #[error("failed to create fdt: {0}")]
200    CreateFdt(cros_fdt::Error),
201    #[error("failed to create fw_cfg device: {0}")]
202    CreateFwCfgDevice(devices::FwCfgError),
203    #[error("failed to create IOAPIC device: {0}")]
204    CreateIoapicDevice(base::Error),
205    #[error("failed to create a PCI root hub: {0}")]
206    CreatePciRoot(arch::DeviceRegistrationError),
207    #[error("unable to create PIT: {0}")]
208    CreatePit(base::Error),
209    #[error("unable to make PIT device: {0}")]
210    CreatePitDevice(devices::PitError),
211    #[cfg(any(target_os = "android", target_os = "linux"))]
212    #[error("unable to create proxy device: {0}")]
213    CreateProxyDevice(devices::ProxyError),
214    #[error("unable to create serial devices: {0}")]
215    CreateSerialDevices(arch::DeviceRegistrationError),
216    #[error("failed to create socket: {0}")]
217    CreateSocket(io::Error),
218    #[error("failed to create tube: {0}")]
219    CreateTube(base::TubeError),
220    #[error("failed to create VCPU: {0}")]
221    CreateVcpu(base::Error),
222    #[error("DTB size is larger than the allowed size")]
223    DTBSizeGreaterThanAllowed,
224    #[error("invalid e820 setup params")]
225    E820Configuration,
226    #[error("failed to enable singlestep execution: {0}")]
227    EnableSinglestep(base::Error),
228    #[error("failed to enable split irqchip: {0}")]
229    EnableSplitIrqchip(base::Error),
230    #[error("failed to get serial cmdline: {0}")]
231    GetSerialCmdline(GetSerialCmdlineError),
232    #[error("failed to insert device onto bus: {0}")]
233    InsertBus(devices::BusError),
234    #[error("the kernel extends past the end of RAM")]
235    InvalidCpuConfig,
236    #[error("invalid CPU config parameters")]
237    KernelOffsetPastEnd,
238    #[error("error loading bios: {0}")]
239    LoadBios(io::Error),
240    #[error("error loading kernel bzImage: {0}")]
241    LoadBzImage(bzimage::Error),
242    #[error("error loading custom pVM firmware: {0}")]
243    LoadCustomPvmFw(arch::LoadImageError),
244    #[error("error loading initrd: {0}")]
245    LoadInitrd(arch::LoadImageError),
246    #[error("error loading Kernel: {0}")]
247    LoadKernel(kernel_loader::Error),
248    #[error("error loading pflash: {0}")]
249    LoadPflash(io::Error),
250    #[error("error loading pVM firmware: {0}")]
251    LoadPvmFw(base::Error),
252    #[error("error in multiboot_info setup")]
253    MultibootInfoSetup,
254    #[error("error translating address: Page not present")]
255    PageNotPresent,
256    #[error("pci mmio overlaps with pVM firmware memory")]
257    PciMmioOverlapPvmFw,
258    #[error("pVM firmware not supported when bios is used on x86_64")]
259    PvmFwBiosUnsupported,
260    #[error("error reading guest memory {0}")]
261    ReadingGuestMemory(vm_memory::GuestMemoryError),
262    #[error("single register read not supported on x86_64")]
263    ReadRegIsUnsupported,
264    #[error("error reading CPU registers {0}")]
265    ReadRegs(base::Error),
266    #[error("error registering an IrqFd: {0}")]
267    RegisterIrqfd(base::Error),
268    #[error("error registering virtual socket device: {0}")]
269    RegisterVsock(arch::DeviceRegistrationError),
270    #[error("error reserved pcie config mmio")]
271    ReservePcieCfgMmio(resources::Error),
272    #[error("failed to set a hardware breakpoint: {0}")]
273    SetHwBreakpoint(base::Error),
274    #[error("failed to set identity map addr: {0}")]
275    SetIdentityMapAddr(base::Error),
276    #[error("failed to set interrupts: {0}")]
277    SetLint(interrupts::Error),
278    #[error("failed to set tss addr: {0}")]
279    SetTssAddr(base::Error),
280    #[error("failed to set up cmos: {0}")]
281    SetupCmos(anyhow::Error),
282    #[error("failed to set up cpuid: {0}")]
283    SetupCpuid(cpuid::Error),
284    #[error("setup data too large")]
285    SetupDataTooLarge,
286    #[error("failed to set up FPU: {0}")]
287    SetupFpu(base::Error),
288    #[error("failed to set up guest memory: {0}")]
289    SetupGuestMemory(GuestMemoryError),
290    #[error("failed to set up mptable: {0}")]
291    SetupMptable(mptable::Error),
292    #[error("failed to set up MSRs: {0}")]
293    SetupMsrs(base::Error),
294    #[error("failed to set up page tables: {0}")]
295    SetupPageTables(regs::Error),
296    #[error("failed to set up pflash: {0}")]
297    SetupPflash(anyhow::Error),
298    #[error("failed to set up registers: {0}")]
299    SetupRegs(regs::Error),
300    #[error("failed to set up SMBIOS: {0}")]
301    SetupSmbios(smbios::Error),
302    #[error("failed to set up sregs: {0}")]
303    SetupSregs(base::Error),
304    #[error("too many vCPUs")]
305    TooManyVcpus,
306    #[error("failed to translate virtual address")]
307    TranslatingVirtAddr,
308    #[error("protected VMs not supported on x86_64")]
309    UnsupportedProtectionType,
310    #[error("single register write not supported on x86_64")]
311    WriteRegIsUnsupported,
312    #[error("error writing CPU registers {0}")]
313    WriteRegs(base::Error),
314    #[error("error writing guest memory {0}")]
315    WritingGuestMemory(GuestMemoryError),
316    #[error("error writing setup_data: {0}")]
317    WritingSetupData(GuestMemoryError),
318    #[error("the zero page extends past the end of guest_mem")]
319    ZeroPagePastRamEnd,
320    #[error("error writing the zero page of guest memory")]
321    ZeroPageSetup,
322}
323
324pub type Result<T> = std::result::Result<T, Error>;
325
326pub struct X8664arch;
327
328// Like `bootparam::setup_data` without the incomplete array field at the end, which allows us to
329// safely implement Copy, Clone
330#[repr(C)]
331#[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
332struct setup_data_hdr {
333    pub next: u64,
334    pub type_: u32,
335    pub len: u32,
336}
337
338#[repr(u32)]
339#[derive(Copy, Clone, Debug, PartialEq, Eq)]
340pub enum SetupDataType {
341    Dtb = SETUP_DTB,
342    RngSeed = SETUP_RNG_SEED,
343}
344
345/// A single entry to be inserted in the bootparam `setup_data` linked list.
346pub struct SetupData {
347    pub data: Vec<u8>,
348    pub type_: SetupDataType,
349}
350
351impl SetupData {
352    /// Returns the length of the data
353    pub fn size(&self) -> usize {
354        self.data.len()
355    }
356}
357
358/// Collection of SetupData entries to be inserted in the
359/// bootparam `setup_data` linked list.
360pub struct SetupDataEntries {
361    entries: Vec<SetupData>,
362    setup_data_start: usize,
363    setup_data_end: usize,
364    available_size: usize,
365}
366
367impl SetupDataEntries {
368    /// Returns a new instance of SetupDataEntries
369    pub fn new(setup_data_start: usize, setup_data_end: usize) -> SetupDataEntries {
370        SetupDataEntries {
371            entries: Vec::new(),
372            setup_data_start,
373            setup_data_end,
374            available_size: setup_data_end - setup_data_start,
375        }
376    }
377
378    /// Adds a new SetupDataEntry and returns the remaining size available
379    pub fn insert(&mut self, setup_data: SetupData) -> usize {
380        self.available_size -= setup_data.size();
381        self.entries.push(setup_data);
382
383        self.available_size
384    }
385
386    /// Copy setup_data entries to guest memory and link them together with the `next` field.
387    /// Returns the guest address of the first entry in the setup_data list, if any.
388    pub fn write_setup_data(&self, guest_mem: &GuestMemory) -> Result<Option<GuestAddress>> {
389        write_setup_data(
390            guest_mem,
391            GuestAddress(self.setup_data_start as u64),
392            GuestAddress(self.setup_data_end as u64),
393            &self.entries,
394        )
395    }
396}
397
398#[derive(Copy, Clone, Debug)]
399enum E820Type {
400    Ram = 0x01,
401    Reserved = 0x2,
402}
403
404#[derive(Copy, Clone, Debug)]
405struct E820Entry {
406    pub address: GuestAddress,
407    pub len: u64,
408    pub mem_type: E820Type,
409}
410
411const KB: u64 = 1 << 10;
412const MB: u64 = 1 << 20;
413const GB: u64 = 1 << 30;
414
415pub const BOOT_STACK_POINTER: u64 = 0x8000;
416const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32;
417// Make sure it align to 256MB for MTRR convenient
418const MEM_32BIT_GAP_SIZE: u64 = 768 * MB;
419// Reserved memory for nand_bios/LAPIC/IOAPIC/HPET/.....
420const RESERVED_MEM_SIZE: u64 = 0x800_0000;
421const DEFAULT_PCI_MEM_END: u64 = FIRST_ADDR_PAST_32BITS - RESERVED_MEM_SIZE - 1;
422// Reserve 64MB for pcie enhanced configuration
423const DEFAULT_PCIE_CFG_MMIO_SIZE: u64 = 0x400_0000;
424const DEFAULT_PCIE_CFG_MMIO_END: u64 = FIRST_ADDR_PAST_32BITS - RESERVED_MEM_SIZE - 1;
425const DEFAULT_PCIE_CFG_MMIO_START: u64 = DEFAULT_PCIE_CFG_MMIO_END - DEFAULT_PCIE_CFG_MMIO_SIZE + 1;
426// Linux (with 4-level paging) has a physical memory limit of 46 bits (64 TiB).
427const HIGH_MMIO_MAX_END: u64 = (1u64 << 46) - 1;
428pub const KERNEL_32BIT_ENTRY_OFFSET: u64 = 0x0;
429pub const KERNEL_64BIT_ENTRY_OFFSET: u64 = 0x200;
430pub const MULTIBOOT_INFO_OFFSET: u64 = 0x6000;
431pub const MULTIBOOT_INFO_SIZE: u64 = 0x1000;
432pub const ZERO_PAGE_OFFSET: u64 = 0x7000;
433// Set BIOS max size to 16M: this is used only when `unrestricted guest` is disabled
434const BIOS_MAX_SIZE: u64 = 0x1000000;
435
436pub const KERNEL_START_OFFSET: u64 = 0x20_0000;
437const CMDLINE_OFFSET: u64 = 0x2_0000;
438const CMDLINE_MAX_SIZE: u64 = 0x800; // including terminating zero
439const SETUP_DATA_START: u64 = CMDLINE_OFFSET + CMDLINE_MAX_SIZE;
440const SETUP_DATA_END: u64 = MPTABLE_RANGE.start;
441const X86_64_FDT_MAX_SIZE: u64 = 0x4000;
442const X86_64_SERIAL_1_3_IRQ: u32 = 4;
443const X86_64_SERIAL_2_4_IRQ: u32 = 3;
444// X86_64_SCI_IRQ is used to fill the ACPI FACP table.
445// The sci_irq number is better to be a legacy
446// IRQ number which is less than 16(actually most of the
447// platforms have fixed IRQ number 9). So we can
448// reserve the IRQ number 5 for SCI and let the
449// the other devices starts from next.
450pub const X86_64_SCI_IRQ: u32 = 5;
451// The CMOS RTC uses IRQ 8; start allocating IRQs at 9.
452pub const X86_64_IRQ_BASE: u32 = 9;
453const ACPI_HI_RSDP_WINDOW_BASE: u64 = 0x000E_0000;
454
455// pVM firmware memory. Should be within the low 4GB, so that it is identity-mapped
456// by setup_page_tables() when a protected VM boots in long mode, since the pVM firmware is
457// the VM entry point.
458const PROTECTED_VM_FW_MAX_SIZE: u64 = 0x40_0000;
459// Load the pVM firmware just below 2 GB to allow use of `-mcmodel=small`.
460const PROTECTED_VM_FW_START: u64 = 0x8000_0000 - PROTECTED_VM_FW_MAX_SIZE;
461
462#[derive(Debug, PartialEq, Eq)]
463pub enum CpuManufacturer {
464    Intel,
465    Amd,
466    Unknown,
467}
468
469pub fn get_cpu_manufacturer() -> CpuManufacturer {
470    cpuid::cpu_manufacturer()
471}
472
473pub struct ArchMemoryLayout {
474    // the pci mmio range below 4G
475    pci_mmio_before_32bit: AddressRange,
476    // the pcie cfg mmio range
477    pcie_cfg_mmio: AddressRange,
478    // the pVM firmware memory (if running a protected VM)
479    pvmfw_mem: Option<AddressRange>,
480}
481
482pub fn create_arch_memory_layout(
483    pci_config: &PciConfig,
484    has_protected_vm_firmware: bool,
485) -> Result<ArchMemoryLayout> {
486    // the max bus number is 256 and each bus occupy 1MB, so the max pcie cfg mmio size = 256M
487    const MAX_PCIE_ECAM_SIZE: u64 = 256 * MB;
488    let pcie_cfg_mmio = match pci_config.ecam {
489        Some(MemoryRegionConfig {
490            start,
491            size: Some(size),
492        }) => AddressRange::from_start_and_size(start, size.min(MAX_PCIE_ECAM_SIZE)).unwrap(),
493        Some(MemoryRegionConfig { start, size: None }) => {
494            AddressRange::from_start_and_end(start, DEFAULT_PCIE_CFG_MMIO_END)
495        }
496        None => {
497            AddressRange::from_start_and_end(DEFAULT_PCIE_CFG_MMIO_START, DEFAULT_PCIE_CFG_MMIO_END)
498        }
499    };
500    if pcie_cfg_mmio.start % pcie_cfg_mmio.len().unwrap() != 0
501        || pcie_cfg_mmio.start % MB != 0
502        || pcie_cfg_mmio.len().unwrap() % MB != 0
503    {
504        return Err(Error::ConfigurePciEcam(
505            "base and len must be aligned to 1MB and base must be a multiple of len".to_string(),
506        ));
507    }
508    if pcie_cfg_mmio.end >= 0x1_0000_0000 {
509        return Err(Error::ConfigurePciEcam(
510            "end address can't go beyond 4G".to_string(),
511        ));
512    }
513
514    let pci_mmio_before_32bit = match pci_config.mem {
515        Some(MemoryRegionConfig {
516            start,
517            size: Some(size),
518        }) => AddressRange::from_start_and_size(start, size)
519            .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
520        Some(MemoryRegionConfig { start, size: None }) => {
521            AddressRange::from_start_and_end(start, DEFAULT_PCI_MEM_END)
522        }
523        None => AddressRange::from_start_and_end(
524            pcie_cfg_mmio
525                .start
526                .min(FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE),
527            DEFAULT_PCI_MEM_END,
528        ),
529    };
530
531    let pvmfw_mem = if has_protected_vm_firmware {
532        let range = AddressRange {
533            start: PROTECTED_VM_FW_START,
534            end: PROTECTED_VM_FW_START + PROTECTED_VM_FW_MAX_SIZE - 1,
535        };
536        if !pci_mmio_before_32bit.intersect(range).is_empty() {
537            return Err(Error::PciMmioOverlapPvmFw);
538        }
539
540        Some(range)
541    } else {
542        None
543    };
544
545    Ok(ArchMemoryLayout {
546        pci_mmio_before_32bit,
547        pcie_cfg_mmio,
548        pvmfw_mem,
549    })
550}
551
552/// The x86 reset vector for i386+ and x86_64 puts the processor into an "unreal mode" where it
553/// can access the last 1 MB of the 32-bit address space in 16-bit mode, and starts the instruction
554/// pointer at the effective physical address 0xFFFF_FFF0.
555fn bios_start(bios_size: u64) -> GuestAddress {
556    GuestAddress(FIRST_ADDR_PAST_32BITS - bios_size)
557}
558
559fn identity_map_addr_start() -> GuestAddress {
560    // Set Identity map address 4 pages before the max BIOS size
561    GuestAddress(FIRST_ADDR_PAST_32BITS - BIOS_MAX_SIZE - 4 * 0x1000)
562}
563
564fn tss_addr_start() -> GuestAddress {
565    // Set TSS address one page after identity map address
566    GuestAddress(identity_map_addr_start().offset() + 0x1000)
567}
568
569fn tss_addr_end() -> GuestAddress {
570    // Set TSS address section to have 3 pages
571    GuestAddress(tss_addr_start().offset() + 0x3000)
572}
573
574fn configure_boot_params(
575    guest_mem: &GuestMemory,
576    cmdline_addr: GuestAddress,
577    setup_data: Option<GuestAddress>,
578    initrd: Option<(GuestAddress, u32)>,
579    mut params: boot_params,
580    e820_entries: &[E820Entry],
581) -> Result<()> {
582    const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
583    const KERNEL_HDR_MAGIC: u32 = 0x5372_6448;
584    const KERNEL_LOADER_OTHER: u8 = 0xff;
585    const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x100_0000; // Must be non-zero.
586
587    params.hdr.type_of_loader = KERNEL_LOADER_OTHER;
588    params.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC;
589    params.hdr.header = KERNEL_HDR_MAGIC;
590    params.hdr.cmd_line_ptr = cmdline_addr.offset() as u32;
591    params.ext_cmd_line_ptr = (cmdline_addr.offset() >> 32) as u32;
592    params.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES;
593    if let Some(setup_data) = setup_data {
594        params.hdr.setup_data = setup_data.offset();
595    }
596    if let Some((initrd_addr, initrd_size)) = initrd {
597        params.hdr.ramdisk_image = initrd_addr.offset() as u32;
598        params.ext_ramdisk_image = (initrd_addr.offset() >> 32) as u32;
599        params.hdr.ramdisk_size = initrd_size;
600        params.ext_ramdisk_size = 0;
601    }
602
603    if e820_entries.len() >= params.e820_table.len() {
604        return Err(Error::E820Configuration);
605    }
606
607    for (src, dst) in e820_entries.iter().zip(params.e820_table.iter_mut()) {
608        dst.addr = src.address.offset();
609        dst.size = src.len;
610        dst.type_ = src.mem_type as u32;
611    }
612    params.e820_entries = e820_entries.len() as u8;
613
614    let zero_page_addr = GuestAddress(ZERO_PAGE_OFFSET);
615    if !guest_mem.is_valid_range(zero_page_addr, mem::size_of::<boot_params>() as u64) {
616        return Err(Error::ZeroPagePastRamEnd);
617    }
618
619    guest_mem
620        .write_obj_at_addr(params, zero_page_addr)
621        .map_err(|_| Error::ZeroPageSetup)?;
622
623    Ok(())
624}
625
626fn configure_multiboot_info(
627    guest_mem: &GuestMemory,
628    cmdline_addr: GuestAddress,
629    e820_entries: &[E820Entry],
630) -> Result<()> {
631    let mut multiboot_info = MultibootInfo {
632        ..Default::default()
633    };
634
635    // Extra Multiboot-related data is added directly after the info structure.
636    let mut multiboot_data_addr =
637        GuestAddress(MULTIBOOT_INFO_OFFSET + mem::size_of_val(&multiboot_info) as u64);
638    multiboot_data_addr = multiboot_data_addr
639        .align(16)
640        .ok_or(Error::MultibootInfoSetup)?;
641
642    // mem_lower is the amount of RAM below 1 MB, in units of KiB.
643    let mem_lower = guest_mem
644        .regions()
645        .filter(|r| {
646            r.options.purpose == MemoryRegionPurpose::GuestMemoryRegion
647                && r.guest_addr.offset() < 1 * MB
648        })
649        .map(|r| r.size as u64)
650        .sum::<u64>()
651        / KB;
652
653    // mem_upper is the amount of RAM above 1 MB up to the first memory hole, in units of KiB.
654    // We don't have the ISA 15-16 MB hole, so this includes all RAM from 1 MB up to the
655    // beginning of the PCI hole just below 4 GB.
656    let mem_upper = guest_mem
657        .regions()
658        .filter(|r| {
659            r.options.purpose == MemoryRegionPurpose::GuestMemoryRegion
660                && r.guest_addr.offset() >= 1 * MB
661                && r.guest_addr.offset() < 4 * GB
662        })
663        .map(|r| r.size as u64)
664        .sum::<u64>()
665        / KB;
666
667    multiboot_info.mem_lower = mem_lower as u32;
668    multiboot_info.mem_upper = mem_upper as u32;
669    multiboot_info.flags |= MultibootInfo::F_MEM;
670
671    // Memory map - convert from params.e820_table to Multiboot format.
672    let multiboot_mmap: Vec<MultibootMmapEntry> = e820_entries
673        .iter()
674        .map(|e820_entry| MultibootMmapEntry {
675            size: 20, // size of the entry, not including the size field itself
676            base_addr: e820_entry.address.offset(),
677            length: e820_entry.len,
678            type_: e820_entry.mem_type as u32,
679        })
680        .collect();
681    let multiboot_mmap_bytes = multiboot_mmap.as_bytes();
682    let multiboot_mmap_addr =
683        append_multiboot_info(guest_mem, &mut multiboot_data_addr, multiboot_mmap_bytes)?;
684    multiboot_info.mmap_addr = multiboot_mmap_addr.offset() as u32;
685    multiboot_info.mmap_length = multiboot_mmap_bytes.len() as u32;
686    multiboot_info.flags |= MultibootInfo::F_MMAP;
687
688    // Command line
689    multiboot_info.cmdline = cmdline_addr.offset() as u32;
690    multiboot_info.flags |= MultibootInfo::F_CMDLINE;
691
692    // Boot loader name
693    let boot_loader_name_addr =
694        append_multiboot_info(guest_mem, &mut multiboot_data_addr, b"crosvm\0")?;
695    multiboot_info.boot_loader_name = boot_loader_name_addr.offset() as u32;
696    multiboot_info.flags |= MultibootInfo::F_BOOT_LOADER_NAME;
697
698    guest_mem
699        .write_obj_at_addr(multiboot_info, GuestAddress(MULTIBOOT_INFO_OFFSET))
700        .map_err(|_| Error::MultibootInfoSetup)?;
701
702    Ok(())
703}
704
705fn append_multiboot_info(
706    guest_mem: &GuestMemory,
707    addr: &mut GuestAddress,
708    data: &[u8],
709) -> Result<GuestAddress> {
710    let data_addr = *addr;
711    let new_addr = addr
712        .checked_add(data.len() as u64)
713        .and_then(|a| a.align(16))
714        .ok_or(Error::MultibootInfoSetup)?;
715
716    // Make sure we don't write beyond the region reserved for Multiboot info.
717    if new_addr.offset() - MULTIBOOT_INFO_OFFSET > MULTIBOOT_INFO_SIZE {
718        return Err(Error::MultibootInfoSetup);
719    }
720
721    guest_mem
722        .write_all_at_addr(data, data_addr)
723        .map_err(|_| Error::MultibootInfoSetup)?;
724
725    *addr = new_addr;
726    Ok(data_addr)
727}
728
729/// Write setup_data entries in guest memory and link them together with the `next` field.
730///
731/// Returns the guest address of the first entry in the setup_data list, if any.
732fn write_setup_data(
733    guest_mem: &GuestMemory,
734    setup_data_start: GuestAddress,
735    setup_data_end: GuestAddress,
736    setup_data: &[SetupData],
737) -> Result<Option<GuestAddress>> {
738    let mut setup_data_list_head = None;
739
740    // Place the first setup_data at the first 64-bit aligned offset following setup_data_start.
741    let mut setup_data_addr = setup_data_start.align(8).ok_or(Error::SetupDataTooLarge)?;
742
743    let mut entry_iter = setup_data.iter().peekable();
744    while let Some(entry) = entry_iter.next() {
745        if setup_data_list_head.is_none() {
746            setup_data_list_head = Some(setup_data_addr);
747        }
748
749        // Ensure the entry (header plus data) fits into guest memory.
750        let entry_size = (mem::size_of::<setup_data_hdr>() + entry.data.len()) as u64;
751        let entry_end = setup_data_addr
752            .checked_add(entry_size)
753            .ok_or(Error::SetupDataTooLarge)?;
754
755        if entry_end >= setup_data_end {
756            return Err(Error::SetupDataTooLarge);
757        }
758
759        let next_setup_data_addr = if entry_iter.peek().is_some() {
760            // Place the next setup_data at a 64-bit aligned address.
761            setup_data_addr
762                .checked_add(entry_size)
763                .and_then(|addr| addr.align(8))
764                .ok_or(Error::SetupDataTooLarge)?
765        } else {
766            // This is the final entry. Terminate the list with next == 0.
767            GuestAddress(0)
768        };
769
770        let hdr = setup_data_hdr {
771            next: next_setup_data_addr.offset(),
772            type_: entry.type_ as u32,
773            len: entry
774                .data
775                .len()
776                .try_into()
777                .map_err(|_| Error::SetupDataTooLarge)?,
778        };
779
780        guest_mem
781            .write_obj_at_addr(hdr, setup_data_addr)
782            .map_err(Error::WritingSetupData)?;
783        guest_mem
784            .write_all_at_addr(
785                &entry.data,
786                setup_data_addr.unchecked_add(mem::size_of::<setup_data_hdr>() as u64),
787            )
788            .map_err(Error::WritingSetupData)?;
789
790        setup_data_addr = next_setup_data_addr;
791    }
792
793    Ok(setup_data_list_head)
794}
795
796/// Find the first `setup_data_hdr` with the given type in guest memory and return its address.
797fn find_setup_data(
798    mem: &GuestMemory,
799    setup_data_start: GuestAddress,
800    setup_data_end: GuestAddress,
801    type_: SetupDataType,
802) -> Option<GuestAddress> {
803    let mut setup_data_addr = setup_data_start.align(8)?;
804    while setup_data_addr < setup_data_end {
805        let hdr: setup_data_hdr = mem.read_obj_from_addr(setup_data_addr).ok()?;
806        if hdr.type_ == type_ as u32 {
807            return Some(setup_data_addr);
808        }
809
810        if hdr.next == 0 {
811            return None;
812        }
813
814        setup_data_addr = GuestAddress(hdr.next);
815    }
816    None
817}
818
819/// Generate a SETUP_RNG_SEED SetupData with random seed data.
820fn setup_data_rng_seed() -> SetupData {
821    let data: [u8; 256] = rand::random();
822    SetupData {
823        data: data.to_vec(),
824        type_: SetupDataType::RngSeed,
825    }
826}
827
828/// Add an e820 region to the e820 map.
829fn add_e820_entry(
830    e820_entries: &mut Vec<E820Entry>,
831    range: AddressRange,
832    mem_type: E820Type,
833) -> Result<()> {
834    e820_entries.push(E820Entry {
835        address: GuestAddress(range.start),
836        len: range.len().ok_or(Error::E820Configuration)?,
837        mem_type,
838    });
839
840    Ok(())
841}
842
843/// Generate a memory map in INT 0x15 AX=0xE820 format.
844fn generate_e820_memory_map(
845    arch_memory_layout: &ArchMemoryLayout,
846    guest_mem: &GuestMemory,
847) -> Result<Vec<E820Entry>> {
848    let mut e820_entries = Vec::new();
849
850    for r in guest_mem.regions() {
851        let range = AddressRange::from_start_and_size(r.guest_addr.offset(), r.size as u64)
852            .expect("invalid guest mem region");
853        let mem_type = match r.options.purpose {
854            MemoryRegionPurpose::Bios => E820Type::Reserved,
855            MemoryRegionPurpose::GuestMemoryRegion => E820Type::Ram,
856            // After the pVM firmware jumped to the guest, the pVM firmware itself is no longer
857            // running, so its memory is reusable by the guest OS. So add this memory as RAM rather
858            // than Reserved.
859            MemoryRegionPurpose::ProtectedFirmwareRegion => E820Type::Ram,
860            MemoryRegionPurpose::ReservedMemory => E820Type::Reserved,
861        };
862        add_e820_entry(&mut e820_entries, range, mem_type)?;
863    }
864
865    let pcie_cfg_mmio_range = arch_memory_layout.pcie_cfg_mmio;
866    add_e820_entry(&mut e820_entries, pcie_cfg_mmio_range, E820Type::Reserved)?;
867
868    add_e820_entry(
869        &mut e820_entries,
870        X8664arch::get_pcie_vcfg_mmio_range(guest_mem, &pcie_cfg_mmio_range),
871        E820Type::Reserved,
872    )?;
873
874    // Reserve memory section for Identity map and TSS
875    add_e820_entry(
876        &mut e820_entries,
877        AddressRange {
878            start: identity_map_addr_start().offset(),
879            end: tss_addr_end().offset() - 1,
880        },
881        E820Type::Reserved,
882    )?;
883
884    Ok(e820_entries)
885}
886
887/// Returns a Vec of the valid memory addresses.
888/// These should be used to configure the GuestMemory structure for the platform.
889/// For x86_64 all addresses are valid from the start of the kernel except a
890/// carve out at the end of 32bit address space.
891pub fn arch_memory_regions(
892    arch_memory_layout: &ArchMemoryLayout,
893    mem_size: u64,
894    bios_size: Option<u64>,
895) -> Vec<(GuestAddress, u64, MemoryRegionOptions)> {
896    let mut regions = Vec::new();
897
898    // Some guest kernels expect a typical PC memory layout where the region between 640 KB and
899    // 1 MB is reserved for device memory/ROMs and get confused if there is a RAM region
900    // spanning this area, so we provide the traditional 640 KB low memory and 1 MB+
901    // high memory regions.
902    let mem_below_1m = 640 * KB;
903    regions.push((
904        GuestAddress(0),
905        mem_below_1m,
906        MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
907    ));
908
909    // Reserved/BIOS data area between 640 KB and 1 MB.
910    // This needs to be backed by an actual GuestMemory region so we can write BIOS tables here, but
911    // it should be reported as "reserved" in the e820 memory map to match PC architecture
912    // expectations.
913    regions.push((
914        GuestAddress(640 * KB),
915        (1 * MB) - (640 * KB),
916        MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ReservedMemory),
917    ));
918
919    // RAM between 1 MB and 4 GB
920    let mem_1m_to_4g = arch_memory_layout.pci_mmio_before_32bit.start.min(mem_size) - 1 * MB;
921    regions.push((
922        GuestAddress(1 * MB),
923        mem_1m_to_4g,
924        MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
925    ));
926
927    // RAM above 4 GB
928    let mem_above_4g = mem_size.saturating_sub(1 * MB + mem_1m_to_4g);
929    if mem_above_4g > 0 {
930        regions.push((
931            GuestAddress(FIRST_ADDR_PAST_32BITS),
932            mem_above_4g,
933            MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
934        ));
935    }
936
937    if let Some(bios_size) = bios_size {
938        regions.push((
939            bios_start(bios_size),
940            bios_size,
941            MemoryRegionOptions::new().purpose(MemoryRegionPurpose::Bios),
942        ));
943    }
944
945    if let Some(pvmfw_mem) = arch_memory_layout.pvmfw_mem {
946        // Remove any areas of guest memory regions that overlap the pVM firmware range.
947        while let Some(overlapping_region_index) = regions.iter().position(|(addr, size, _opts)| {
948            let region_addr_range = AddressRange::from_start_and_size(addr.offset(), *size)
949                .expect("invalid GuestMemory range");
950            region_addr_range.overlaps(pvmfw_mem)
951        }) {
952            let overlapping_region = regions.swap_remove(overlapping_region_index);
953            let overlapping_region_range = AddressRange::from_start_and_size(
954                overlapping_region.0.offset(),
955                overlapping_region.1,
956            )
957            .unwrap();
958            let (first, second) = overlapping_region_range.non_overlapping_ranges(pvmfw_mem);
959            if !first.is_empty() {
960                regions.push((
961                    GuestAddress(first.start),
962                    first.len().unwrap(),
963                    overlapping_region.2.clone(),
964                ));
965            }
966            if !second.is_empty() {
967                regions.push((
968                    GuestAddress(second.start),
969                    second.len().unwrap(),
970                    overlapping_region.2,
971                ));
972            }
973        }
974
975        // Insert a region for the pVM firmware area.
976        regions.push((
977            GuestAddress(pvmfw_mem.start),
978            pvmfw_mem.len().expect("invalid pvmfw region"),
979            MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
980        ));
981    }
982
983    regions.sort_unstable_by_key(|(addr, _, _)| *addr);
984
985    for (addr, size, options) in &regions {
986        debug!(
987            "{:#018x}-{:#018x} {:?}",
988            addr.offset(),
989            addr.offset() + size - 1,
990            options.purpose,
991        );
992    }
993
994    regions
995}
996
997impl arch::LinuxArch for X8664arch {
998    type Error = Error;
999    type ArchMemoryLayout = ArchMemoryLayout;
1000
1001    fn arch_memory_layout(
1002        components: &VmComponents,
1003    ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
1004        create_arch_memory_layout(
1005            &components.pci_config,
1006            components.hv_cfg.protection_type.runs_firmware(),
1007        )
1008    }
1009
1010    fn guest_memory_layout(
1011        components: &VmComponents,
1012        arch_memory_layout: &Self::ArchMemoryLayout,
1013        _hypervisor: &impl Hypervisor,
1014    ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
1015        let bios_size = match &components.vm_image {
1016            VmImage::Bios(bios_file) => Some(bios_file.metadata().map_err(Error::LoadBios)?.len()),
1017            VmImage::Kernel(_) => None,
1018        };
1019
1020        Ok(arch_memory_regions(
1021            arch_memory_layout,
1022            components.memory_size,
1023            bios_size,
1024        ))
1025    }
1026
1027    fn get_system_allocator_config<V: Vm>(
1028        vm: &V,
1029        arch_memory_layout: &Self::ArchMemoryLayout,
1030    ) -> SystemAllocatorConfig {
1031        SystemAllocatorConfig {
1032            io: Some(AddressRange {
1033                start: 0xc000,
1034                end: 0xffff,
1035            }),
1036            low_mmio: arch_memory_layout.pci_mmio_before_32bit,
1037            high_mmio: Self::get_high_mmio_range(vm, arch_memory_layout),
1038            platform_mmio: None,
1039            first_irq: X86_64_IRQ_BASE,
1040        }
1041    }
1042
1043    fn build_vm<V, Vcpu>(
1044        mut components: VmComponents,
1045        arch_memory_layout: &Self::ArchMemoryLayout,
1046        vm_evt_wrtube: &SendTube,
1047        system_allocator: &mut SystemAllocator,
1048        serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
1049        serial_jail: Option<Minijail>,
1050        battery: (Option<BatteryType>, Option<Minijail>),
1051        mut vm: V,
1052        ramoops_region: Option<arch::pstore::RamoopsRegion>,
1053        devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
1054        irq_chip: &mut dyn IrqChipX86_64,
1055        vcpu_ids: &mut Vec<usize>,
1056        dump_device_tree_blob: Option<PathBuf>,
1057        debugcon_jail: Option<Minijail>,
1058        pflash_jail: Option<Minijail>,
1059        fw_cfg_jail: Option<Minijail>,
1060        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1061        guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
1062        device_tree_overlays: Vec<DtbOverlay>,
1063        _fdt_position: Option<FdtPosition>,
1064        _no_pmu: bool,
1065    ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
1066    where
1067        V: VmX86_64,
1068        Vcpu: VcpuX86_64,
1069    {
1070        let mem = vm.get_memory().clone();
1071
1072        let vcpu_count = components.vcpu_count;
1073
1074        vm.set_identity_map_addr(identity_map_addr_start())
1075            .map_err(Error::SetIdentityMapAddr)?;
1076
1077        vm.set_tss_addr(tss_addr_start())
1078            .map_err(Error::SetTssAddr)?;
1079
1080        // Use IRQ info in ACPI if provided by the user.
1081        let mut mptable = true;
1082        let mut sci_irq = X86_64_SCI_IRQ;
1083
1084        // punch pcie config mmio from pci low mmio, so that it couldn't be
1085        // allocated to any device.
1086        let pcie_cfg_mmio_range = arch_memory_layout.pcie_cfg_mmio;
1087        system_allocator
1088            .reserve_mmio(pcie_cfg_mmio_range)
1089            .map_err(Error::ReservePcieCfgMmio)?;
1090
1091        for sdt in components.acpi_sdts.iter() {
1092            if sdt.is_signature(b"FACP") {
1093                mptable = false;
1094                let sci_irq_fadt: u16 = sdt.read(acpi::FADT_FIELD_SCI_INTERRUPT);
1095                sci_irq = sci_irq_fadt.into();
1096                if !system_allocator.reserve_irq(sci_irq) {
1097                    warn!("sci irq {} already reserved.", sci_irq);
1098                }
1099            }
1100        }
1101
1102        let pcie_vcfg_range = Self::get_pcie_vcfg_mmio_range(&mem, &pcie_cfg_mmio_range);
1103        let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
1104        let io_bus = Arc::new(Bus::new(BusType::Io));
1105        let hypercall_bus = Arc::new(Bus::new(BusType::Hypercall));
1106
1107        let (pci_devices, _devs): (Vec<_>, Vec<_>) = devs
1108            .into_iter()
1109            .partition(|(dev, _)| dev.as_pci_device().is_some());
1110
1111        let pci_devices = pci_devices
1112            .into_iter()
1113            .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
1114            .collect();
1115
1116        let (pci, pci_irqs, pid_debug_label_map, amls, gpe_scope_amls) = arch::generate_pci_root(
1117            pci_devices,
1118            irq_chip.as_irq_chip_mut(),
1119            mmio_bus.clone(),
1120            GuestAddress(pcie_cfg_mmio_range.start),
1121            12,
1122            io_bus.clone(),
1123            system_allocator,
1124            &mut vm,
1125            4, // Share the four pin interrupts (INTx#)
1126            Some(pcie_vcfg_range.start),
1127            #[cfg(feature = "swap")]
1128            swap_controller,
1129        )
1130        .map_err(Error::CreatePciRoot)?;
1131
1132        let pci = Arc::new(Mutex::new(pci));
1133        pci.lock().enable_pcie_cfg_mmio(pcie_cfg_mmio_range.start);
1134        let pci_cfg = PciConfigIo::new(
1135            pci.clone(),
1136            components.break_linux_pci_config_io,
1137            vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1138        );
1139        let pci_bus = Arc::new(Mutex::new(pci_cfg));
1140        io_bus.insert(pci_bus, 0xcf8, 0x8).unwrap();
1141
1142        let pcie_cfg_mmio = Arc::new(Mutex::new(PciConfigMmio::new(pci.clone(), 12)));
1143        let pcie_cfg_mmio_len = pcie_cfg_mmio_range.len().unwrap();
1144        mmio_bus
1145            .insert(pcie_cfg_mmio, pcie_cfg_mmio_range.start, pcie_cfg_mmio_len)
1146            .unwrap();
1147
1148        let pcie_vcfg_mmio = Arc::new(Mutex::new(PciVirtualConfigMmio::new(pci.clone(), 13)));
1149        mmio_bus
1150            .insert(
1151                pcie_vcfg_mmio,
1152                pcie_vcfg_range.start,
1153                pcie_vcfg_range.len().unwrap(),
1154            )
1155            .unwrap();
1156
1157        // Event used to notify crosvm that guest OS is trying to suspend.
1158        let (suspend_tube_send, suspend_tube_recv) =
1159            Tube::directional_pair().map_err(Error::CreateTube)?;
1160        let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
1161
1162        if components.fw_cfg_enable {
1163            Self::setup_fw_cfg_device(
1164                &io_bus,
1165                components.fw_cfg_parameters.clone(),
1166                components.bootorder_fw_cfg_blob.clone(),
1167                fw_cfg_jail,
1168                #[cfg(feature = "swap")]
1169                swap_controller,
1170            )?;
1171        }
1172
1173        if !components.no_i8042 {
1174            Self::setup_legacy_i8042_device(
1175                &io_bus,
1176                irq_chip.pit_uses_speaker_port(),
1177                vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1178            )?;
1179        }
1180        let mut vm_request_tube = if !components.no_rtc {
1181            let (host_tube, device_tube) = Tube::pair()
1182                .context("create tube")
1183                .map_err(Error::SetupCmos)?;
1184            Self::setup_legacy_cmos_device(
1185                arch_memory_layout,
1186                &io_bus,
1187                irq_chip,
1188                device_tube,
1189                components.memory_size,
1190            )
1191            .map_err(Error::SetupCmos)?;
1192            Some(host_tube)
1193        } else {
1194            None
1195        };
1196        let serial_devices = Self::setup_serial_devices(
1197            components.hv_cfg.protection_type,
1198            irq_chip.as_irq_chip_mut(),
1199            &io_bus,
1200            serial_parameters,
1201            serial_jail,
1202            #[cfg(feature = "swap")]
1203            swap_controller,
1204        )?;
1205        Self::setup_debugcon_devices(
1206            components.hv_cfg.protection_type,
1207            &io_bus,
1208            serial_parameters,
1209            debugcon_jail,
1210            #[cfg(feature = "swap")]
1211            swap_controller,
1212        )?;
1213
1214        let bios_size = if let VmImage::Bios(ref bios) = components.vm_image {
1215            bios.metadata().map_err(Error::LoadBios)?.len()
1216        } else {
1217            0
1218        };
1219        if let Some(pflash_image) = components.pflash_image {
1220            Self::setup_pflash(
1221                pflash_image,
1222                components.pflash_block_size,
1223                bios_size,
1224                &mmio_bus,
1225                pflash_jail,
1226                #[cfg(feature = "swap")]
1227                swap_controller,
1228            )?;
1229        }
1230
1231        // Functions that use/create jails MUST be used before the call to
1232        // setup_acpi_devices below, as this move us into a multiprocessing state
1233        // from which we can no longer fork.
1234
1235        let mut resume_notify_devices = Vec::new();
1236
1237        // each bus occupy 1MB mmio for pcie enhanced configuration
1238        let max_bus = (pcie_cfg_mmio_len / 0x100000 - 1) as u8;
1239        let (mut acpi_dev_resource, bat_control) = Self::setup_acpi_devices(
1240            arch_memory_layout,
1241            pci.clone(),
1242            &mem,
1243            &io_bus,
1244            system_allocator,
1245            suspend_tube_send.clone(),
1246            vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
1247            components.acpi_sdts,
1248            irq_chip.as_irq_chip_mut(),
1249            sci_irq,
1250            battery,
1251            &mmio_bus,
1252            max_bus,
1253            &mut resume_notify_devices,
1254            #[cfg(feature = "swap")]
1255            swap_controller,
1256            #[cfg(any(target_os = "android", target_os = "linux"))]
1257            components.ac_adapter,
1258            guest_suspended_cvar,
1259            &pci_irqs,
1260        )?;
1261
1262        // Create customized SSDT table
1263        let sdt = acpi::create_customize_ssdt(pci.clone(), amls, gpe_scope_amls);
1264        if let Some(sdt) = sdt {
1265            acpi_dev_resource.sdts.push(sdt);
1266        }
1267
1268        irq_chip
1269            .finalize_devices(system_allocator, &io_bus, &mmio_bus)
1270            .map_err(Error::RegisterIrqfd)?;
1271
1272        // All of these bios generated tables are set manually for the benefit of the kernel boot
1273        // flow (since there's no BIOS to set it) and for the BIOS boot flow since crosvm doesn't
1274        // have a way to pass the BIOS these configs.
1275        // This works right now because the only guest BIOS used with crosvm (u-boot) ignores these
1276        // tables and the guest OS picks them up.
1277        // If another guest does need a way to pass these tables down to it's BIOS, this approach
1278        // should be rethought.
1279
1280        // Make sure the `vcpu_count` casts below and the arithmetic in `setup_mptable` are well
1281        // defined.
1282        if vcpu_count >= u8::MAX.into() {
1283            return Err(Error::TooManyVcpus);
1284        }
1285
1286        if mptable {
1287            mptable::setup_mptable(&mem, vcpu_count as u8, &pci_irqs)
1288                .map_err(Error::SetupMptable)?;
1289        }
1290        smbios::setup_smbios(&mem, &components.smbios, bios_size).map_err(Error::SetupSmbios)?;
1291
1292        let host_cpus = if components.host_cpu_topology {
1293            components.vcpu_affinity.clone()
1294        } else {
1295            None
1296        };
1297
1298        // TODO (tjeznach) Write RSDP to bootconfig before writing to memory
1299        acpi::create_acpi_tables(
1300            &mem,
1301            vcpu_count as u8,
1302            sci_irq,
1303            0xcf9,
1304            6, // RST_CPU|SYS_RST
1305            &acpi_dev_resource,
1306            host_cpus,
1307            vcpu_ids,
1308            &pci_irqs,
1309            pcie_cfg_mmio_range.start,
1310            max_bus,
1311            components.force_s2idle,
1312        )
1313        .ok_or(Error::CreateAcpi)?;
1314
1315        let mut cmdline = Self::get_base_linux_cmdline();
1316
1317        get_serial_cmdline(&mut cmdline, serial_parameters, "io", &serial_devices)
1318            .map_err(Error::GetSerialCmdline)?;
1319
1320        for param in components.extra_kernel_params {
1321            cmdline.insert_str(&param).map_err(Error::Cmdline)?;
1322        }
1323
1324        if let Some(ramoops_region) = ramoops_region {
1325            arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
1326                .map_err(Error::Cmdline)?;
1327        }
1328
1329        let pci_start = arch_memory_layout.pci_mmio_before_32bit.start;
1330
1331        let mut vcpu_init = vec![VcpuInitX86_64::default(); vcpu_count];
1332        let mut msrs = BTreeMap::new();
1333
1334        let protection_type = components.hv_cfg.protection_type;
1335
1336        match components.vm_image {
1337            VmImage::Bios(ref mut bios) => {
1338                if protection_type.runs_firmware() {
1339                    return Err(Error::PvmFwBiosUnsupported);
1340                }
1341
1342                // Allow a bios to hardcode CMDLINE_OFFSET and read the kernel command line from it.
1343                Self::load_cmdline(
1344                    &mem,
1345                    GuestAddress(CMDLINE_OFFSET),
1346                    cmdline,
1347                    CMDLINE_MAX_SIZE as usize - 1,
1348                )?;
1349                Self::load_bios(&mem, bios)?;
1350                regs::set_default_msrs(&mut msrs);
1351                // The default values for `Regs` and `Sregs` already set up the reset vector.
1352            }
1353            VmImage::Kernel(ref mut kernel_image) => {
1354                let (params, kernel_region, kernel_entry, mut cpu_mode, kernel_type) =
1355                    Self::load_kernel(&mem, kernel_image)?;
1356
1357                info!("Loaded {} kernel", kernel_type);
1358
1359                Self::setup_system_memory(
1360                    arch_memory_layout,
1361                    &mem,
1362                    cmdline,
1363                    components.initrd_image,
1364                    components.android_fstab,
1365                    kernel_region,
1366                    params,
1367                    dump_device_tree_blob,
1368                    device_tree_overlays,
1369                    protection_type,
1370                )?;
1371
1372                if protection_type.needs_firmware_loaded() {
1373                    arch::load_image(
1374                        &mem,
1375                        &mut components
1376                            .pvm_fw
1377                            .expect("pvmfw must be available if ProtectionType loads it"),
1378                        GuestAddress(PROTECTED_VM_FW_START),
1379                        PROTECTED_VM_FW_MAX_SIZE,
1380                    )
1381                    .map_err(Error::LoadCustomPvmFw)?;
1382                } else if protection_type.runs_firmware() {
1383                    // Tell the hypervisor to load the pVM firmware.
1384                    vm.load_protected_vm_firmware(
1385                        GuestAddress(PROTECTED_VM_FW_START),
1386                        PROTECTED_VM_FW_MAX_SIZE,
1387                    )
1388                    .map_err(Error::LoadPvmFw)?;
1389                }
1390
1391                let entry_addr = if protection_type.needs_firmware_loaded() {
1392                    Some(PROTECTED_VM_FW_START)
1393                } else if protection_type.runs_firmware() {
1394                    None // Initial RIP value is set by the hypervisor
1395                } else {
1396                    Some(kernel_entry.offset())
1397                };
1398
1399                if let Some(entry) = entry_addr {
1400                    vcpu_init[0].regs.rip = entry;
1401                }
1402
1403                match kernel_type {
1404                    KernelType::BzImage | KernelType::Elf => {
1405                        // Configure the bootstrap VCPU for the Linux/x86 boot protocol.
1406                        // <https://www.kernel.org/doc/html/latest/x86/boot.html>
1407                        vcpu_init[0].regs.rsp = BOOT_STACK_POINTER;
1408                        vcpu_init[0].regs.rsi = ZERO_PAGE_OFFSET;
1409                    }
1410                    KernelType::Multiboot => {
1411                        // Provide Multiboot-compatible bootloader information.
1412                        vcpu_init[0].regs.rax = MULTIBOOT_BOOTLOADER_MAGIC.into();
1413                        vcpu_init[0].regs.rbx = MULTIBOOT_INFO_OFFSET;
1414                    }
1415                }
1416
1417                if protection_type.runs_firmware() {
1418                    // Pass DTB address to pVM firmware. This is redundant with the DTB entry in the
1419                    // `setup_data` list, but it allows the pVM firmware to know the location of the
1420                    // DTB without having the `setup_data` region mapped yet.
1421                    if let Some(fdt_setup_data_addr) = find_setup_data(
1422                        &mem,
1423                        GuestAddress(SETUP_DATA_START),
1424                        GuestAddress(SETUP_DATA_END),
1425                        SetupDataType::Dtb,
1426                    ) {
1427                        vcpu_init[0].regs.rdx =
1428                            fdt_setup_data_addr.offset() + size_of::<setup_data_hdr>() as u64;
1429                    }
1430
1431                    // Pass pVM payload entry address to pVM firmware.
1432                    // NOTE: this is only for development purposes. An actual pvmfw
1433                    // implementation should not use this value and should instead receive
1434                    // the pVM payload start and size info from crosvm as the DTB properties
1435                    // /config/kernel-address and /config/kernel-size and determine the offset
1436                    // of the entry point on its own, not trust crosvm to provide it.
1437                    vcpu_init[0].regs.rdi = kernel_entry.offset();
1438
1439                    // The pVM firmware itself always starts in 32-bit protected mode
1440                    // with paging disabled, regardless of the type of payload.
1441                    cpu_mode = CpuMode::FlatProtectedMode;
1442                }
1443
1444                match cpu_mode {
1445                    CpuMode::LongMode => {
1446                        regs::set_long_mode_msrs(&mut msrs);
1447
1448                        // Set up long mode and enable paging.
1449                        regs::configure_segments_and_sregs(&mem, &mut vcpu_init[0].sregs)
1450                            .map_err(Error::ConfigureSegments)?;
1451                        regs::setup_page_tables(&mem, &mut vcpu_init[0].sregs)
1452                            .map_err(Error::SetupPageTables)?;
1453                    }
1454                    CpuMode::FlatProtectedMode => {
1455                        regs::set_default_msrs(&mut msrs);
1456
1457                        // Set up 32-bit protected mode with paging disabled.
1458                        regs::configure_segments_and_sregs_flat32(&mem, &mut vcpu_init[0].sregs)
1459                            .map_err(Error::ConfigureSegments)?;
1460                    }
1461                }
1462
1463                regs::set_mtrr_msrs(&mut msrs, &vm, pci_start);
1464            }
1465        }
1466
1467        // Initialize MSRs for all VCPUs.
1468        for vcpu in vcpu_init.iter_mut() {
1469            vcpu.msrs = msrs.clone();
1470        }
1471
1472        let mut vm_request_tubes = Vec::new();
1473        if let Some(req_tube) = vm_request_tube.take() {
1474            vm_request_tubes.push(req_tube);
1475        }
1476
1477        Ok(RunnableLinuxVm {
1478            vm,
1479            vcpu_count,
1480            vcpus: None,
1481            vcpu_affinity: components.vcpu_affinity,
1482            vcpu_init,
1483            no_smt: components.no_smt,
1484            irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
1485            hypercall_bus,
1486            io_bus,
1487            mmio_bus,
1488            pid_debug_label_map,
1489            suspend_tube: (suspend_tube_send, suspend_tube_recv),
1490            resume_notify_devices,
1491            rt_cpus: components.rt_cpus,
1492            delay_rt: components.delay_rt,
1493            bat_control,
1494            pm: Some(acpi_dev_resource.pm),
1495            root_config: pci,
1496            #[cfg(any(target_os = "android", target_os = "linux"))]
1497            platform_devices: Vec::new(),
1498            hotplug_bus: BTreeMap::new(),
1499            devices_thread: None,
1500            vm_request_tubes,
1501        })
1502    }
1503
1504    fn configure_vcpu<V: Vm>(
1505        vm: &V,
1506        hypervisor: &dyn HypervisorX86_64,
1507        irq_chip: &mut dyn IrqChipX86_64,
1508        vcpu: &mut dyn VcpuX86_64,
1509        vcpu_init: VcpuInitX86_64,
1510        vcpu_id: usize,
1511        num_cpus: usize,
1512        cpu_config: Option<CpuConfigX86_64>,
1513    ) -> Result<()> {
1514        let cpu_config = match cpu_config {
1515            Some(config) => config,
1516            None => return Err(Error::InvalidCpuConfig),
1517        };
1518        if !vm.check_capability(VmCap::EarlyInitCpuid) {
1519            cpuid::setup_cpuid(hypervisor, irq_chip, vcpu, vcpu_id, num_cpus, cpu_config)
1520                .map_err(Error::SetupCpuid)?;
1521        }
1522
1523        vcpu.set_regs(&vcpu_init.regs).map_err(Error::WriteRegs)?;
1524
1525        vcpu.set_sregs(&vcpu_init.sregs)
1526            .map_err(Error::SetupSregs)?;
1527
1528        vcpu.set_fpu(&vcpu_init.fpu).map_err(Error::SetupFpu)?;
1529
1530        let vcpu_supported_var_mtrrs = regs::vcpu_supported_variable_mtrrs(vcpu);
1531        let num_var_mtrrs = regs::count_variable_mtrrs(&vcpu_init.msrs);
1532        let skip_mtrr_msrs = if num_var_mtrrs > vcpu_supported_var_mtrrs {
1533            warn!(
1534                "Too many variable MTRR entries ({} required, {} supported),
1535                please check pci_start addr, guest with pass through device may be very slow",
1536                num_var_mtrrs, vcpu_supported_var_mtrrs,
1537            );
1538            // Filter out the MTRR entries from the MSR list.
1539            true
1540        } else {
1541            false
1542        };
1543
1544        for (msr_index, value) in vcpu_init.msrs.into_iter() {
1545            if skip_mtrr_msrs && regs::is_mtrr_msr(msr_index) {
1546                continue;
1547            }
1548
1549            vcpu.set_msr(msr_index, value).map_err(Error::SetupMsrs)?;
1550        }
1551
1552        interrupts::set_lint(vcpu_id, irq_chip).map_err(Error::SetLint)?;
1553
1554        Ok(())
1555    }
1556
1557    fn register_pci_device<V: VmX86_64, Vcpu: VcpuX86_64>(
1558        linux: &mut RunnableLinuxVm<V, Vcpu>,
1559        device: Box<dyn PciDevice>,
1560        #[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>,
1561        resources: &mut SystemAllocator,
1562        hp_control_tube: &mpsc::Sender<PciRootCommand>,
1563        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1564    ) -> Result<PciAddress> {
1565        arch::configure_pci_device(
1566            linux,
1567            device,
1568            #[cfg(any(target_os = "android", target_os = "linux"))]
1569            minijail,
1570            resources,
1571            hp_control_tube,
1572            #[cfg(feature = "swap")]
1573            swap_controller,
1574        )
1575        .map_err(Error::ConfigurePciDevice)
1576    }
1577
1578    fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>> {
1579        Ok(BTreeMap::new())
1580    }
1581
1582    fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>> {
1583        Ok(BTreeMap::new())
1584    }
1585
1586    fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>> {
1587        Ok(BTreeMap::new())
1588    }
1589
1590    fn get_host_cpu_clusters() -> Result<Vec<CpuSet>> {
1591        Ok(Vec::new())
1592    }
1593}
1594
1595// OSC returned status register in CDW1
1596const OSC_STATUS_UNSUPPORT_UUID: u32 = 0x4;
1597// pci host bridge OSC returned control register in CDW3
1598#[allow(dead_code)]
1599const PCI_HB_OSC_CONTROL_PCIE_HP: u32 = 0x1;
1600const PCI_HB_OSC_CONTROL_SHPC_HP: u32 = 0x2;
1601#[allow(dead_code)]
1602const PCI_HB_OSC_CONTROL_PCIE_PME: u32 = 0x4;
1603const PCI_HB_OSC_CONTROL_PCIE_AER: u32 = 0x8;
1604#[allow(dead_code)]
1605const PCI_HB_OSC_CONTROL_PCIE_CAP: u32 = 0x10;
1606
1607struct PciRootOSC {}
1608
1609// Method (_OSC, 4, NotSerialized)  // _OSC: Operating System Capabilities
1610// {
1611//     CreateDWordField (Arg3, Zero, CDW1)  // flag and return value
1612//     If (Arg0 == ToUUID ("33db4d5b-1ff7-401c-9657-7441c03dd766"))
1613//     {
1614//         CreateDWordField (Arg3, 8, CDW3) // control field
1615//         if ( 0 == (CDW1 & 0x01))  // Query flag ?
1616//         {
1617//              CDW3 &= !(SHPC_HP | AER)
1618//         }
1619//     } Else {
1620//         CDW1 |= UNSUPPORT_UUID
1621//     }
1622//     Return (Arg3)
1623// }
1624impl Aml for PciRootOSC {
1625    fn to_aml_bytes(&self, aml: &mut Vec<u8>) {
1626        let osc_uuid = "33DB4D5B-1FF7-401C-9657-7441C03DD766";
1627        // virtual pcie root port supports hotplug, pme, and pcie cap register, clear all
1628        // the other bits.
1629        let mask = !(PCI_HB_OSC_CONTROL_SHPC_HP | PCI_HB_OSC_CONTROL_PCIE_AER);
1630        aml::Method::new(
1631            "_OSC".into(),
1632            4,
1633            false,
1634            vec![
1635                &aml::CreateDWordField::new(
1636                    &aml::Name::new_field_name("CDW1"),
1637                    &aml::Arg(3),
1638                    &aml::ZERO,
1639                ),
1640                &aml::If::new(
1641                    &aml::Equal::new(&aml::Arg(0), &aml::Uuid::new(osc_uuid)),
1642                    vec![
1643                        &aml::CreateDWordField::new(
1644                            &aml::Name::new_field_name("CDW3"),
1645                            &aml::Arg(3),
1646                            &(8_u8),
1647                        ),
1648                        &aml::If::new(
1649                            &aml::Equal::new(
1650                                &aml::ZERO,
1651                                &aml::And::new(
1652                                    &aml::ZERO,
1653                                    &aml::Name::new_field_name("CDW1"),
1654                                    &aml::ONE,
1655                                ),
1656                            ),
1657                            vec![&aml::And::new(
1658                                &aml::Name::new_field_name("CDW3"),
1659                                &mask,
1660                                &aml::Name::new_field_name("CDW3"),
1661                            )],
1662                        ),
1663                    ],
1664                ),
1665                &aml::Else::new(vec![&aml::Or::new(
1666                    &aml::Name::new_field_name("CDW1"),
1667                    &OSC_STATUS_UNSUPPORT_UUID,
1668                    &aml::Name::new_field_name("CDW1"),
1669                )]),
1670                &aml::Return::new(&aml::Arg(3)),
1671            ],
1672        )
1673        .to_aml_bytes(aml)
1674    }
1675}
1676
1677pub enum CpuMode {
1678    /// 32-bit protected mode with paging disabled.
1679    FlatProtectedMode,
1680
1681    /// 64-bit long mode.
1682    LongMode,
1683}
1684
1685#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1686pub enum KernelType {
1687    BzImage,
1688    Elf,
1689    Multiboot,
1690}
1691
1692impl fmt::Display for KernelType {
1693    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1694        match self {
1695            KernelType::BzImage => write!(f, "bzImage"),
1696            KernelType::Elf => write!(f, "ELF"),
1697            KernelType::Multiboot => write!(f, "Multiboot"),
1698        }
1699    }
1700}
1701
1702impl X8664arch {
1703    /// Loads the bios from an open file.
1704    ///
1705    /// # Arguments
1706    ///
1707    /// * `mem` - The memory to be used by the guest.
1708    /// * `bios_image` - the File object for the specified bios
1709    fn load_bios(mem: &GuestMemory, bios_image: &mut File) -> Result<()> {
1710        let bios_image_length = bios_image.get_len().map_err(Error::LoadBios)?;
1711        if bios_image_length >= FIRST_ADDR_PAST_32BITS {
1712            return Err(Error::LoadBios(io::Error::new(
1713                io::ErrorKind::InvalidData,
1714                format!(
1715                    "bios was {bios_image_length} bytes, expected less than {FIRST_ADDR_PAST_32BITS}",
1716                ),
1717            )));
1718        }
1719
1720        let guest_slice = mem
1721            .get_slice_at_addr(bios_start(bios_image_length), bios_image_length as usize)
1722            .map_err(Error::SetupGuestMemory)?;
1723        bios_image
1724            .read_exact_at_volatile(guest_slice, 0)
1725            .map_err(Error::LoadBios)?;
1726        Ok(())
1727    }
1728
1729    fn setup_pflash(
1730        pflash_image: File,
1731        block_size: u32,
1732        bios_size: u64,
1733        mmio_bus: &Bus,
1734        jail: Option<Minijail>,
1735        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1736    ) -> Result<()> {
1737        let size = pflash_image.metadata().map_err(Error::LoadPflash)?.len();
1738        let start = FIRST_ADDR_PAST_32BITS - bios_size - size;
1739        let pflash_image = Box::new(pflash_image);
1740
1741        #[cfg(any(target_os = "android", target_os = "linux"))]
1742        let fds = pflash_image.as_raw_descriptors();
1743
1744        let pflash = Pflash::new(pflash_image, block_size).map_err(Error::SetupPflash)?;
1745        let pflash: Arc<Mutex<dyn BusDevice>> = match jail {
1746            #[cfg(any(target_os = "android", target_os = "linux"))]
1747            Some(jail) => Arc::new(Mutex::new(
1748                ProxyDevice::new(
1749                    pflash,
1750                    jail,
1751                    fds,
1752                    #[cfg(feature = "swap")]
1753                    swap_controller,
1754                )
1755                .map_err(Error::CreateProxyDevice)?,
1756            )),
1757            #[cfg(windows)]
1758            Some(_) => unreachable!(),
1759            None => Arc::new(Mutex::new(pflash)),
1760        };
1761        mmio_bus
1762            .insert(pflash, start, size)
1763            .map_err(Error::InsertBus)?;
1764
1765        Ok(())
1766    }
1767
1768    /// Writes the command line string to the given memory slice.
1769    ///
1770    /// # Arguments
1771    ///
1772    /// * `guest_mem` - A u8 slice that will be partially overwritten by the command line.
1773    /// * `guest_addr` - The address in `guest_mem` at which to load the command line.
1774    /// * `cmdline` - The kernel command line.
1775    /// * `kernel_max_cmdline_len` - The maximum command line length (without NUL terminator)
1776    ///   supported by the kernel.
1777    fn load_cmdline(
1778        guest_mem: &GuestMemory,
1779        guest_addr: GuestAddress,
1780        cmdline: kernel_cmdline::Cmdline,
1781        kernel_max_cmdline_len: usize,
1782    ) -> Result<()> {
1783        let mut cmdline_guest_mem_slice = guest_mem
1784            .get_slice_at_addr(guest_addr, CMDLINE_MAX_SIZE as usize)
1785            .map_err(|_| Error::CommandLineOverflow)?;
1786
1787        let mut cmdline_bytes: Vec<u8> = cmdline
1788            .into_bytes_with_max_len(kernel_max_cmdline_len)
1789            .map_err(Error::Cmdline)?;
1790        cmdline_bytes.push(0u8); // Add NUL terminator.
1791
1792        cmdline_guest_mem_slice
1793            .write_all(&cmdline_bytes)
1794            .map_err(|_| Error::CommandLineOverflow)?;
1795
1796        Ok(())
1797    }
1798
1799    /// Loads the kernel from an open file.
1800    ///
1801    /// # Arguments
1802    ///
1803    /// * `mem` - The memory to be used by the guest.
1804    /// * `kernel_image` - the File object for the specified kernel.
1805    ///
1806    /// # Returns
1807    ///
1808    /// On success, returns the Linux x86_64 boot protocol parameters, the address range containing
1809    /// the kernel, the entry point (initial `RIP` value), the initial CPU mode, and the type of
1810    /// kernel.
1811    fn load_kernel(
1812        mem: &GuestMemory,
1813        kernel_image: &mut File,
1814    ) -> Result<(boot_params, AddressRange, GuestAddress, CpuMode, KernelType)> {
1815        let kernel_start = GuestAddress(KERNEL_START_OFFSET);
1816
1817        let multiboot =
1818            kernel_loader::multiboot_header_from_file(kernel_image).map_err(Error::LoadKernel)?;
1819
1820        if let Some(multiboot_load) = multiboot.as_ref().and_then(|m| m.load.as_ref()) {
1821            let loaded_kernel = kernel_loader::load_multiboot(mem, kernel_image, multiboot_load)
1822                .map_err(Error::LoadKernel)?;
1823
1824            let boot_params = boot_params {
1825                hdr: setup_header {
1826                    cmdline_size: CMDLINE_MAX_SIZE as u32 - 1,
1827                    ..Default::default()
1828                },
1829                ..Default::default()
1830            };
1831            return Ok((
1832                boot_params,
1833                loaded_kernel.address_range,
1834                loaded_kernel.entry,
1835                CpuMode::FlatProtectedMode,
1836                KernelType::Multiboot,
1837            ));
1838        }
1839
1840        match kernel_loader::load_elf(mem, kernel_start, kernel_image, 0) {
1841            Ok(loaded_kernel) => {
1842                // ELF kernels don't contain a `boot_params` structure, so synthesize a default one.
1843                let boot_params = boot_params {
1844                    hdr: setup_header {
1845                        cmdline_size: CMDLINE_MAX_SIZE as u32 - 1,
1846                        ..Default::default()
1847                    },
1848                    ..Default::default()
1849                };
1850                Ok((
1851                    boot_params,
1852                    loaded_kernel.address_range,
1853                    loaded_kernel.entry,
1854                    match loaded_kernel.class {
1855                        kernel_loader::ElfClass::ElfClass32 => CpuMode::FlatProtectedMode,
1856                        kernel_loader::ElfClass::ElfClass64 => CpuMode::LongMode,
1857                    },
1858                    KernelType::Elf,
1859                ))
1860            }
1861            Err(kernel_loader::Error::InvalidMagicNumber) => {
1862                // The image failed to parse as ELF, so try to load it as a bzImage.
1863                let (boot_params, bzimage_region, bzimage_entry, cpu_mode) =
1864                    bzimage::load_bzimage(mem, kernel_start, kernel_image)
1865                        .map_err(Error::LoadBzImage)?;
1866                Ok((
1867                    boot_params,
1868                    bzimage_region,
1869                    bzimage_entry,
1870                    cpu_mode,
1871                    KernelType::BzImage,
1872                ))
1873            }
1874            Err(e) => Err(Error::LoadKernel(e)),
1875        }
1876    }
1877
1878    /// Configures the system memory space should be called once per vm before
1879    /// starting vcpu threads.
1880    ///
1881    /// # Arguments
1882    ///
1883    /// * `mem` - The memory to be used by the guest.
1884    /// * `cmdline` - the kernel commandline
1885    /// * `initrd_file` - an initial ramdisk image
1886    pub fn setup_system_memory(
1887        arch_memory_layout: &ArchMemoryLayout,
1888        mem: &GuestMemory,
1889        cmdline: kernel_cmdline::Cmdline,
1890        initrd_file: Option<File>,
1891        android_fstab: Option<File>,
1892        kernel_region: AddressRange,
1893        params: boot_params,
1894        dump_device_tree_blob: Option<PathBuf>,
1895        device_tree_overlays: Vec<DtbOverlay>,
1896        protection_type: ProtectionType,
1897    ) -> Result<()> {
1898        let e820_entries = generate_e820_memory_map(arch_memory_layout, mem)?;
1899
1900        let kernel_max_cmdline_len = if params.hdr.cmdline_size == 0 {
1901            // Old kernels have a maximum length of 255 bytes, not including the NUL.
1902            255
1903        } else {
1904            params.hdr.cmdline_size as usize
1905        };
1906        debug!("kernel_max_cmdline_len={kernel_max_cmdline_len}");
1907        Self::load_cmdline(
1908            mem,
1909            GuestAddress(CMDLINE_OFFSET),
1910            cmdline,
1911            kernel_max_cmdline_len,
1912        )?;
1913
1914        let initrd = match initrd_file {
1915            Some(mut initrd_file) => {
1916                let initrd_addr_max = if params.hdr.xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G != 0 {
1917                    u64::MAX
1918                } else if params.hdr.initrd_addr_max == 0 {
1919                    // Default initrd_addr_max for old kernels (see Documentation/x86/boot.txt).
1920                    0x37FFFFFF
1921                } else {
1922                    u64::from(params.hdr.initrd_addr_max)
1923                };
1924
1925                let (initrd_start, initrd_size) = arch::load_image_high(
1926                    mem,
1927                    &mut initrd_file,
1928                    GuestAddress(kernel_region.end + 1),
1929                    GuestAddress(initrd_addr_max),
1930                    Some(|region| {
1931                        region.options.purpose != MemoryRegionPurpose::ProtectedFirmwareRegion
1932                    }),
1933                    base::pagesize() as u64,
1934                )
1935                .map_err(Error::LoadInitrd)?;
1936                Some((initrd_start, initrd_size))
1937            }
1938            None => None,
1939        };
1940
1941        let mut setup_data_entries =
1942            SetupDataEntries::new(SETUP_DATA_START as usize, SETUP_DATA_END as usize);
1943
1944        let setup_data_size = setup_data_entries.insert(setup_data_rng_seed());
1945
1946        // SETUP_DTB should be the last one in SETUP_DATA.
1947        // This is to reserve enough space for SETUP_DTB
1948        // without exceeding the size of SETUP_DATA area.
1949        if android_fstab.is_some()
1950            || !device_tree_overlays.is_empty()
1951            || protection_type.runs_firmware()
1952        {
1953            let fdt_max_size = min(X86_64_FDT_MAX_SIZE as usize, setup_data_size);
1954            let mut device_tree_blob = fdt::create_fdt(
1955                mem,
1956                android_fstab,
1957                dump_device_tree_blob,
1958                device_tree_overlays,
1959                kernel_region,
1960                initrd,
1961            )
1962            .map_err(Error::CreateFdt)?;
1963            if device_tree_blob.len() > fdt_max_size {
1964                return Err(Error::DTBSizeGreaterThanAllowed);
1965            }
1966
1967            // Reserve and zero fill dtb memory to maximum allowable size
1968            // so that pvmfw could patch and extend the dtb in-place.
1969            device_tree_blob.resize(fdt_max_size, 0);
1970
1971            setup_data_entries.insert(SetupData {
1972                data: device_tree_blob,
1973                type_: SetupDataType::Dtb,
1974            });
1975        }
1976
1977        let setup_data = setup_data_entries.write_setup_data(mem)?;
1978
1979        configure_boot_params(
1980            mem,
1981            GuestAddress(CMDLINE_OFFSET),
1982            setup_data,
1983            initrd,
1984            params,
1985            &e820_entries,
1986        )?;
1987
1988        configure_multiboot_info(mem, GuestAddress(CMDLINE_OFFSET), &e820_entries)?;
1989
1990        Ok(())
1991    }
1992
1993    fn get_pcie_vcfg_mmio_range(mem: &GuestMemory, pcie_cfg_mmio: &AddressRange) -> AddressRange {
1994        // Put PCIe VCFG region at a 2MB boundary after physical memory or 4gb, whichever is
1995        // greater.
1996        let ram_end_round_2mb = mem.end_addr().offset().next_multiple_of(2 * MB);
1997        let start = std::cmp::max(ram_end_round_2mb, 4 * GB);
1998        // Each pci device's ECAM size is 4kb and its vcfg size is 8kb
1999        let end = start + pcie_cfg_mmio.len().unwrap() * 2 - 1;
2000        AddressRange { start, end }
2001    }
2002
2003    /// Returns the high mmio range
2004    fn get_high_mmio_range<V: Vm>(vm: &V, arch_memory_layout: &ArchMemoryLayout) -> AddressRange {
2005        let mem = vm.get_memory();
2006        let start = Self::get_pcie_vcfg_mmio_range(mem, &arch_memory_layout.pcie_cfg_mmio).end + 1;
2007
2008        let phys_mem_end = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
2009        let high_mmio_end = std::cmp::min(phys_mem_end, HIGH_MMIO_MAX_END);
2010
2011        AddressRange {
2012            start,
2013            end: high_mmio_end,
2014        }
2015    }
2016
2017    /// This returns a minimal kernel command for this architecture
2018    pub fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
2019        let mut cmdline = kernel_cmdline::Cmdline::new();
2020        cmdline.insert_str("panic=-1").unwrap();
2021
2022        cmdline
2023    }
2024
2025    /// Sets up fw_cfg device.
2026    ///  # Arguments
2027    ///
2028    /// * `io_bus` - the IO bus object
2029    /// * `fw_cfg_parameters` - command-line specified data to add to device. May contain all None
2030    ///   fields if user did not specify data to add to the device
2031    fn setup_fw_cfg_device(
2032        io_bus: &Bus,
2033        fw_cfg_parameters: Vec<FwCfgParameters>,
2034        bootorder_fw_cfg_blob: Vec<u8>,
2035        fw_cfg_jail: Option<Minijail>,
2036        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2037    ) -> Result<()> {
2038        let fw_cfg = match devices::FwCfgDevice::new(FW_CFG_MAX_FILE_SLOTS, fw_cfg_parameters) {
2039            Ok(mut device) => {
2040                // this condition will only be true if the user specified at least one bootindex
2041                // option on the command line. If none were specified, bootorder_fw_cfg_blob will
2042                // only have a null byte (null terminator)
2043                if bootorder_fw_cfg_blob.len() > 1 {
2044                    // Add boot order file to the device. If the file is not present, firmware may
2045                    // not be able to boot.
2046                    if let Err(err) = device.add_file(
2047                        "bootorder",
2048                        bootorder_fw_cfg_blob,
2049                        devices::FwCfgItemType::GenericItem,
2050                    ) {
2051                        return Err(Error::CreateFwCfgDevice(err));
2052                    }
2053                }
2054                device
2055            }
2056            Err(err) => {
2057                return Err(Error::CreateFwCfgDevice(err));
2058            }
2059        };
2060
2061        let fw_cfg: Arc<Mutex<dyn BusDevice>> = match fw_cfg_jail.as_ref() {
2062            #[cfg(any(target_os = "android", target_os = "linux"))]
2063            Some(jail) => {
2064                let jail_clone = jail.try_clone().map_err(Error::CloneJail)?;
2065                #[cfg(feature = "seccomp_trace")]
2066                debug!(
2067                    "seccomp_trace {{\"event\": \"minijail_clone\", \"src_jail_addr\": \"0x{:x}\", \"dst_jail_addr\": \"0x{:x}\"}}",
2068                    read_jail_addr(jail),
2069                    read_jail_addr(&jail_clone)
2070                );
2071                Arc::new(Mutex::new(
2072                    ProxyDevice::new(
2073                        fw_cfg,
2074                        jail_clone,
2075                        Vec::new(),
2076                        #[cfg(feature = "swap")]
2077                        swap_controller,
2078                    )
2079                    .map_err(Error::CreateProxyDevice)?,
2080                ))
2081            }
2082            #[cfg(windows)]
2083            Some(_) => unreachable!(),
2084            None => Arc::new(Mutex::new(fw_cfg)),
2085        };
2086
2087        io_bus
2088            .insert(fw_cfg, FW_CFG_BASE_PORT, FW_CFG_WIDTH)
2089            .map_err(Error::InsertBus)?;
2090
2091        Ok(())
2092    }
2093
2094    /// Sets up the legacy x86 i8042/KBD platform device
2095    ///
2096    /// # Arguments
2097    ///
2098    /// * - `io_bus` - the IO bus object
2099    /// * - `pit_uses_speaker_port` - does the PIT use port 0x61 for the PC speaker
2100    /// * - `vm_evt_wrtube` - the event object which should receive exit events
2101    pub fn setup_legacy_i8042_device(
2102        io_bus: &Bus,
2103        pit_uses_speaker_port: bool,
2104        vm_evt_wrtube: SendTube,
2105    ) -> Result<()> {
2106        let i8042 = Arc::new(Mutex::new(devices::I8042Device::new(
2107            vm_evt_wrtube.try_clone().map_err(Error::CloneTube)?,
2108        )));
2109
2110        if pit_uses_speaker_port {
2111            io_bus.insert(i8042, 0x062, 0x3).unwrap();
2112        } else {
2113            io_bus.insert(i8042, 0x061, 0x4).unwrap();
2114        }
2115
2116        Ok(())
2117    }
2118
2119    /// Sets up the legacy x86 CMOS/RTC platform device
2120    /// # Arguments
2121    ///
2122    /// * - `io_bus` - the IO bus object
2123    /// * - `mem_size` - the size in bytes of physical ram for the guest
2124    pub fn setup_legacy_cmos_device(
2125        arch_memory_layout: &ArchMemoryLayout,
2126        io_bus: &Bus,
2127        irq_chip: &mut dyn IrqChipX86_64,
2128        vm_control: Tube,
2129        mem_size: u64,
2130    ) -> anyhow::Result<()> {
2131        let mem_regions = arch_memory_regions(arch_memory_layout, mem_size, None);
2132
2133        let mem_below_4g = mem_regions
2134            .iter()
2135            .filter(|r| r.0.offset() < FIRST_ADDR_PAST_32BITS)
2136            .map(|r| r.1)
2137            .sum();
2138
2139        let mem_above_4g = mem_regions
2140            .iter()
2141            .filter(|r| r.0.offset() >= FIRST_ADDR_PAST_32BITS)
2142            .map(|r| r.1)
2143            .sum();
2144
2145        let irq_evt = devices::IrqEdgeEvent::new().context("cmos irq")?;
2146        let cmos = devices::cmos::Cmos::new(
2147            mem_below_4g,
2148            mem_above_4g,
2149            Utc::now,
2150            vm_control,
2151            irq_evt.try_clone().context("cmos irq clone")?,
2152        )
2153        .context("create cmos")?;
2154
2155        irq_chip
2156            .register_edge_irq_event(
2157                devices::cmos::RTC_IRQ as u32,
2158                &irq_evt,
2159                IrqEventSource::from_device(&cmos),
2160            )
2161            .context("cmos register irq")?;
2162        io_bus
2163            .insert(Arc::new(Mutex::new(cmos)), 0x70, 0x2)
2164            .context("cmos insert irq")?;
2165
2166        Ok(())
2167    }
2168
2169    /// Sets up the acpi devices for this platform and
2170    /// return the resources which is used to set the ACPI tables.
2171    ///
2172    /// # Arguments
2173    ///
2174    /// * `io_bus` the I/O bus to add the devices to
2175    /// * `resources` the SystemAllocator to allocate IO and MMIO for acpi devices.
2176    /// * `suspend_tube` the tube object which used to suspend/resume the VM.
2177    /// * `sdts` ACPI system description tables
2178    /// * `irq_chip` the IrqChip object for registering irq events
2179    /// * `battery` indicate whether to create the battery
2180    /// * `mmio_bus` the MMIO bus to add the devices to
2181    /// * `pci_irqs` IRQ assignment of PCI devices. Tuples of (PCI address, gsi, PCI interrupt pin).
2182    ///   Note that this matches one of the return values of generate_pci_root.
2183    pub fn setup_acpi_devices(
2184        arch_memory_layout: &ArchMemoryLayout,
2185        pci_root: Arc<Mutex<PciRoot>>,
2186        mem: &GuestMemory,
2187        io_bus: &Bus,
2188        resources: &mut SystemAllocator,
2189        suspend_tube: Arc<Mutex<SendTube>>,
2190        vm_evt_wrtube: SendTube,
2191        sdts: Vec<SDT>,
2192        irq_chip: &mut dyn IrqChip,
2193        sci_irq: u32,
2194        battery: (Option<BatteryType>, Option<Minijail>),
2195        #[cfg_attr(windows, allow(unused_variables))] mmio_bus: &Bus,
2196        max_bus: u8,
2197        resume_notify_devices: &mut Vec<Arc<Mutex<dyn BusResumeDevice>>>,
2198        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2199        #[cfg(any(target_os = "android", target_os = "linux"))] ac_adapter: bool,
2200        guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
2201        pci_irqs: &[(PciAddress, u32, PciInterruptPin)],
2202    ) -> Result<(acpi::AcpiDevResource, Option<BatControl>)> {
2203        // The AML data for the acpi devices
2204        let mut amls = Vec::new();
2205
2206        let bat_control = if let Some(battery_type) = battery.0 {
2207            match battery_type {
2208                #[cfg(any(target_os = "android", target_os = "linux"))]
2209                BatteryType::Goldfish => {
2210                    let irq_num = resources.allocate_irq().ok_or(Error::CreateBatDevices(
2211                        arch::DeviceRegistrationError::AllocateIrq,
2212                    ))?;
2213                    let (control_tube, _mmio_base) = arch::sys::linux::add_goldfish_battery(
2214                        &mut amls,
2215                        battery.1,
2216                        mmio_bus,
2217                        irq_chip,
2218                        irq_num,
2219                        resources,
2220                        #[cfg(feature = "swap")]
2221                        swap_controller,
2222                    )
2223                    .map_err(Error::CreateBatDevices)?;
2224                    Some(BatControl {
2225                        type_: BatteryType::Goldfish,
2226                        control_tube,
2227                    })
2228                }
2229                #[cfg(windows)]
2230                _ => None,
2231            }
2232        } else {
2233            None
2234        };
2235
2236        let pm_alloc = resources.get_anon_alloc();
2237        let pm_iobase = match resources.io_allocator() {
2238            Some(io) => io
2239                .allocate_with_align(
2240                    devices::acpi::ACPIPM_RESOURCE_LEN as u64,
2241                    pm_alloc,
2242                    "ACPIPM".to_string(),
2243                    4, // must be 32-bit aligned
2244                )
2245                .map_err(Error::AllocateIOResouce)?,
2246            None => 0x600,
2247        };
2248
2249        let pcie_vcfg = aml::Name::new(
2250            "VCFG".into(),
2251            &Self::get_pcie_vcfg_mmio_range(mem, &arch_memory_layout.pcie_cfg_mmio).start,
2252        );
2253        pcie_vcfg.to_aml_bytes(&mut amls);
2254
2255        let pm_sci_evt = devices::IrqLevelEvent::new().map_err(Error::CreateEvent)?;
2256
2257        #[cfg(any(target_os = "android", target_os = "linux"))]
2258        let acdc = if ac_adapter {
2259            // Allocate GPE for AC adapter notfication
2260            let gpe = resources.allocate_gpe().ok_or(Error::AllocateGpe)?;
2261
2262            let alloc = resources.get_anon_alloc();
2263            let mmio_base = resources
2264                .allocate_mmio(
2265                    devices::ac_adapter::ACDC_VIRT_MMIO_SIZE,
2266                    alloc,
2267                    "AcAdapter".to_string(),
2268                    resources::AllocOptions::new().align(devices::ac_adapter::ACDC_VIRT_MMIO_SIZE),
2269                )
2270                .unwrap();
2271            let ac_adapter_dev = devices::ac_adapter::AcAdapter::new(mmio_base, gpe);
2272            let ac_dev = Arc::new(Mutex::new(ac_adapter_dev));
2273            mmio_bus
2274                .insert(
2275                    ac_dev.clone(),
2276                    mmio_base,
2277                    devices::ac_adapter::ACDC_VIRT_MMIO_SIZE,
2278                )
2279                .unwrap();
2280
2281            ac_dev.lock().to_aml_bytes(&mut amls);
2282            Some(ac_dev)
2283        } else {
2284            None
2285        };
2286        #[cfg(windows)]
2287        let acdc = None;
2288
2289        //Virtual PMC
2290        if let Some(guest_suspended_cvar) = guest_suspended_cvar {
2291            let alloc = resources.get_anon_alloc();
2292            let mmio_base = resources
2293                .allocate_mmio(
2294                    devices::pmc_virt::VPMC_VIRT_MMIO_SIZE,
2295                    alloc,
2296                    "VirtualPmc".to_string(),
2297                    resources::AllocOptions::new().align(devices::pmc_virt::VPMC_VIRT_MMIO_SIZE),
2298                )
2299                .unwrap();
2300
2301            let pmc_virtio_mmio =
2302                Arc::new(Mutex::new(VirtualPmc::new(mmio_base, guest_suspended_cvar)));
2303            mmio_bus
2304                .insert(
2305                    pmc_virtio_mmio.clone(),
2306                    mmio_base,
2307                    devices::pmc_virt::VPMC_VIRT_MMIO_SIZE,
2308                )
2309                .unwrap();
2310            pmc_virtio_mmio.lock().to_aml_bytes(&mut amls);
2311        }
2312
2313        let mut pmresource = devices::ACPIPMResource::new(
2314            pm_sci_evt.try_clone().map_err(Error::CloneEvent)?,
2315            suspend_tube,
2316            vm_evt_wrtube,
2317            acdc,
2318        );
2319        pmresource.to_aml_bytes(&mut amls);
2320        irq_chip
2321            .register_level_irq_event(
2322                sci_irq,
2323                &pm_sci_evt,
2324                IrqEventSource::from_device(&pmresource),
2325            )
2326            .map_err(Error::RegisterIrqfd)?;
2327        pmresource.start();
2328
2329        let mut crs_entries: Vec<Box<dyn Aml>> = vec![
2330            Box::new(aml::AddressSpace::new_bus_number(0x0u16, max_bus as u16)),
2331            Box::new(aml::IO::new(0xcf8, 0xcf8, 1, 0x8)),
2332        ];
2333        for r in resources.mmio_pools() {
2334            let entry: Box<dyn Aml> = match (u32::try_from(r.start), u32::try_from(r.end)) {
2335                (Ok(start), Ok(end)) => Box::new(aml::AddressSpace::new_memory(
2336                    aml::AddressSpaceCachable::NotCacheable,
2337                    true,
2338                    start,
2339                    end,
2340                )),
2341                _ => Box::new(aml::AddressSpace::new_memory(
2342                    aml::AddressSpaceCachable::NotCacheable,
2343                    true,
2344                    r.start,
2345                    r.end,
2346                )),
2347            };
2348            crs_entries.push(entry);
2349        }
2350
2351        let prt_entries: Vec<aml::Package> = pci_irqs
2352            .iter()
2353            .map(|(pci_address, gsi, pci_intr_pin)| {
2354                aml::Package::new(vec![
2355                    &pci_address.acpi_adr(),
2356                    &pci_intr_pin.to_mask(),
2357                    &aml::ZERO,
2358                    gsi,
2359                ])
2360            })
2361            .collect();
2362
2363        aml::Device::new(
2364            "_SB_.PC00".into(),
2365            vec![
2366                &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A08")),
2367                &aml::Name::new("_CID".into(), &aml::EISAName::new("PNP0A03")),
2368                &aml::Name::new("_ADR".into(), &aml::ZERO),
2369                &aml::Name::new("_SEG".into(), &aml::ZERO),
2370                &aml::Name::new("_UID".into(), &aml::ZERO),
2371                &aml::Name::new("SUPP".into(), &aml::ZERO),
2372                &aml::Name::new(
2373                    "_CRS".into(),
2374                    &aml::ResourceTemplate::new(crs_entries.iter().map(|b| b.as_ref()).collect()),
2375                ),
2376                &PciRootOSC {},
2377                &aml::Name::new(
2378                    "_PRT".into(),
2379                    &aml::Package::new(prt_entries.iter().map(|p| p as &dyn Aml).collect()),
2380                ),
2381            ],
2382        )
2383        .to_aml_bytes(&mut amls);
2384
2385        if let (Some(start), Some(len)) = (
2386            u32::try_from(arch_memory_layout.pcie_cfg_mmio.start).ok(),
2387            arch_memory_layout
2388                .pcie_cfg_mmio
2389                .len()
2390                .and_then(|l| u32::try_from(l).ok()),
2391        ) {
2392            aml::Device::new(
2393                "_SB_.MB00".into(),
2394                vec![
2395                    &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
2396                    &aml::Name::new(
2397                        "_CRS".into(),
2398                        &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
2399                            true, start, len,
2400                        )]),
2401                    ),
2402                ],
2403            )
2404            .to_aml_bytes(&mut amls);
2405        } else {
2406            warn!("Failed to create ACPI MMCFG region reservation");
2407        }
2408
2409        let root_bus = pci_root.lock().get_root_bus();
2410        let addresses = root_bus.lock().get_downstream_devices();
2411        for address in addresses {
2412            if let Some(acpi_path) = pci_root.lock().acpi_path(&address) {
2413                const DEEPEST_SLEEP_STATE: u32 = 3;
2414                aml::Device::new(
2415                    (*acpi_path).into(),
2416                    vec![
2417                        &aml::Name::new("_ADR".into(), &address.acpi_adr()),
2418                        &aml::Name::new(
2419                            "_PRW".into(),
2420                            &aml::Package::new(vec![&PM_WAKEUP_GPIO, &DEEPEST_SLEEP_STATE]),
2421                        ),
2422                    ],
2423                )
2424                .to_aml_bytes(&mut amls);
2425            }
2426        }
2427
2428        let pm = Arc::new(Mutex::new(pmresource));
2429        io_bus
2430            .insert(
2431                pm.clone(),
2432                pm_iobase,
2433                devices::acpi::ACPIPM_RESOURCE_LEN as u64,
2434            )
2435            .unwrap();
2436        resume_notify_devices.push(pm.clone());
2437
2438        Ok((
2439            acpi::AcpiDevResource {
2440                amls,
2441                pm_iobase,
2442                pm,
2443                sdts,
2444            },
2445            bat_control,
2446        ))
2447    }
2448
2449    /// Sets up the serial devices for this platform. Returns a list of configured serial devices.
2450    ///
2451    /// # Arguments
2452    ///
2453    /// * - `irq_chip` the IrqChip object for registering irq events
2454    /// * - `io_bus` the I/O bus to add the devices to
2455    /// * - `serial_parameters` - definitions for how the serial devices should be configured
2456    pub fn setup_serial_devices(
2457        protection_type: ProtectionType,
2458        irq_chip: &mut dyn IrqChip,
2459        io_bus: &Bus,
2460        serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
2461        serial_jail: Option<Minijail>,
2462        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2463    ) -> Result<Vec<SerialDeviceInfo>> {
2464        let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
2465        let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
2466
2467        let serial_devices = arch::add_serial_devices(
2468            protection_type,
2469            io_bus,
2470            (X86_64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
2471            (X86_64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
2472            serial_parameters,
2473            serial_jail,
2474            #[cfg(feature = "swap")]
2475            swap_controller,
2476        )
2477        .map_err(Error::CreateSerialDevices)?;
2478
2479        let source = IrqEventSource {
2480            device_id: Serial::device_id(),
2481            queue_id: 0,
2482            device_name: Serial::debug_label(),
2483        };
2484        irq_chip
2485            .register_edge_irq_event(X86_64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
2486            .map_err(Error::RegisterIrqfd)?;
2487        irq_chip
2488            .register_edge_irq_event(X86_64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
2489            .map_err(Error::RegisterIrqfd)?;
2490
2491        Ok(serial_devices)
2492    }
2493
2494    fn setup_debugcon_devices(
2495        protection_type: ProtectionType,
2496        io_bus: &Bus,
2497        serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
2498        debugcon_jail: Option<Minijail>,
2499        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
2500    ) -> Result<()> {
2501        for param in serial_parameters.values() {
2502            if param.hardware != SerialHardware::Debugcon {
2503                continue;
2504            }
2505
2506            let mut preserved_fds = Vec::new();
2507            let con = param
2508                .create_serial_device::<Debugcon>(
2509                    protection_type,
2510                    // Debugcon doesn't use the interrupt event
2511                    &Event::new().map_err(Error::CreateEvent)?,
2512                    &mut preserved_fds,
2513                )
2514                .map_err(Error::CreateDebugconDevice)?;
2515
2516            let con: Arc<Mutex<dyn BusDevice>> = match debugcon_jail.as_ref() {
2517                #[cfg(any(target_os = "android", target_os = "linux"))]
2518                Some(jail) => {
2519                    let jail_clone = jail.try_clone().map_err(Error::CloneJail)?;
2520                    #[cfg(feature = "seccomp_trace")]
2521                    debug!(
2522                        "seccomp_trace {{\"event\": \"minijail_clone\", \"src_jail_addr\": \"0x{:x}\", \"dst_jail_addr\": \"0x{:x}\"}}",
2523                        read_jail_addr(jail),
2524                        read_jail_addr(&jail_clone)
2525                    );
2526                    Arc::new(Mutex::new(
2527                        ProxyDevice::new(
2528                            con,
2529                            jail_clone,
2530                            preserved_fds,
2531                            #[cfg(feature = "swap")]
2532                            swap_controller,
2533                        )
2534                        .map_err(Error::CreateProxyDevice)?,
2535                    ))
2536                }
2537                #[cfg(windows)]
2538                Some(_) => unreachable!(),
2539                None => Arc::new(Mutex::new(con)),
2540            };
2541            io_bus
2542                .insert(con.clone(), param.debugcon_port.into(), 1)
2543                .map_err(Error::InsertBus)?;
2544        }
2545
2546        Ok(())
2547    }
2548}
2549
2550#[sorted]
2551#[derive(Error, Debug)]
2552pub enum MsrError {
2553    #[error("CPU not support. Only intel CPUs support ITMT.")]
2554    CpuUnSupport,
2555    #[error("msr must be unique: {0}")]
2556    MsrDuplicate(u32),
2557}
2558
2559#[derive(Error, Debug)]
2560pub enum HybridSupportError {
2561    #[error("Host CPU doesn't support hybrid architecture.")]
2562    UnsupportedHostCpu,
2563}
2564
2565/// The wrapper for CPUID call functions.
2566pub struct CpuIdCall {
2567    /// __cpuid_count or a fake function for test.
2568    cpuid_count: unsafe fn(u32, u32) -> CpuidResult,
2569    /// __cpuid or a fake function for test.
2570    cpuid: unsafe fn(u32) -> CpuidResult,
2571}
2572
2573impl CpuIdCall {
2574    pub fn new(
2575        cpuid_count: unsafe fn(u32, u32) -> CpuidResult,
2576        cpuid: unsafe fn(u32) -> CpuidResult,
2577    ) -> CpuIdCall {
2578        CpuIdCall { cpuid_count, cpuid }
2579    }
2580}
2581
2582/// Check if host supports hybrid CPU feature. The check include:
2583///     1. Check if CPUID.1AH exists. CPUID.1AH is hybrid information enumeration leaf.
2584///     2. Check if CPUID.07H.00H:EDX[bit 15] sets. This bit means the processor is identified as a
2585///        hybrid part.
2586///     3. Check if CPUID.1AH:EAX sets. The hybrid core type is set in EAX.
2587///
2588/// # Arguments
2589///
2590/// * - `cpuid` the wrapped cpuid functions used to get CPUID info.
2591pub fn check_host_hybrid_support(cpuid: &CpuIdCall) -> std::result::Result<(), HybridSupportError> {
2592    // CPUID.0H.EAX returns maximum input value for basic CPUID information.
2593    //
2594    // SAFETY:
2595    // Safe because we pass 0 for this call and the host supports the
2596    // `cpuid` instruction.
2597    let mut cpuid_entry = unsafe { (cpuid.cpuid)(0x0) };
2598    if cpuid_entry.eax < 0x1A {
2599        return Err(HybridSupportError::UnsupportedHostCpu);
2600    }
2601    // SAFETY:
2602    // Safe because we pass 0x7 and 0 for this call and the host supports the
2603    // `cpuid` instruction.
2604    cpuid_entry = unsafe { (cpuid.cpuid_count)(0x7, 0) };
2605    if cpuid_entry.edx & 1 << EDX_HYBRID_CPU_SHIFT == 0 {
2606        return Err(HybridSupportError::UnsupportedHostCpu);
2607    }
2608    // From SDM, if a value entered for CPUID.EAX is less than or equal to the
2609    // maximum input value and the leaf is not supported on that processor then
2610    // 0 is returned in all the registers.
2611    // For the CPU with hybrid support, its CPUID.1AH.EAX shouldn't be zero.
2612    //
2613    // SAFETY:
2614    // Safe because we pass 0 for this call and the host supports the
2615    // `cpuid` instruction.
2616    cpuid_entry = unsafe { (cpuid.cpuid)(0x1A) };
2617    if cpuid_entry.eax == 0 {
2618        return Err(HybridSupportError::UnsupportedHostCpu);
2619    }
2620    Ok(())
2621}
2622
2623#[cfg(test)]
2624mod tests {
2625    use std::mem::size_of;
2626
2627    use super::*;
2628
2629    fn setup() -> ArchMemoryLayout {
2630        let pci_config = PciConfig {
2631            ecam: Some(MemoryRegionConfig {
2632                start: 3 * GB,
2633                size: Some(256 * MB),
2634            }),
2635            mem: Some(MemoryRegionConfig {
2636                start: 2 * GB,
2637                size: None,
2638            }),
2639        };
2640        create_arch_memory_layout(&pci_config, false).unwrap()
2641    }
2642
2643    #[test]
2644    fn regions_lt_4gb_nobios() {
2645        let arch_memory_layout = setup();
2646        let regions = arch_memory_regions(&arch_memory_layout, 512 * MB, /* bios_size */ None);
2647        assert_eq!(
2648            regions,
2649            [
2650                (
2651                    GuestAddress(0),
2652                    640 * KB,
2653                    MemoryRegionOptions {
2654                        align: 0,
2655                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2656                        file_backed: None,
2657                    },
2658                ),
2659                (
2660                    GuestAddress(640 * KB),
2661                    384 * KB,
2662                    MemoryRegionOptions {
2663                        align: 0,
2664                        purpose: MemoryRegionPurpose::ReservedMemory,
2665                        file_backed: None,
2666                    },
2667                ),
2668                (
2669                    GuestAddress(1 * MB),
2670                    512 * MB - 1 * MB,
2671                    MemoryRegionOptions {
2672                        align: 0,
2673                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2674                        file_backed: None,
2675                    },
2676                )
2677            ]
2678        );
2679    }
2680
2681    #[test]
2682    fn regions_gt_4gb_nobios() {
2683        let arch_memory_layout = setup();
2684        let size = 4 * GB + 0x8000;
2685        let regions = arch_memory_regions(&arch_memory_layout, size, /* bios_size */ None);
2686        assert_eq!(
2687            regions,
2688            [
2689                (
2690                    GuestAddress(0),
2691                    640 * KB,
2692                    MemoryRegionOptions {
2693                        align: 0,
2694                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2695                        file_backed: None,
2696                    },
2697                ),
2698                (
2699                    GuestAddress(640 * KB),
2700                    384 * KB,
2701                    MemoryRegionOptions {
2702                        align: 0,
2703                        purpose: MemoryRegionPurpose::ReservedMemory,
2704                        file_backed: None,
2705                    },
2706                ),
2707                (
2708                    GuestAddress(1 * MB),
2709                    2 * GB - 1 * MB,
2710                    MemoryRegionOptions {
2711                        align: 0,
2712                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2713                        file_backed: None,
2714                    },
2715                ),
2716                (
2717                    GuestAddress(4 * GB),
2718                    2 * GB + 0x8000,
2719                    MemoryRegionOptions {
2720                        align: 0,
2721                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2722                        file_backed: None,
2723                    },
2724                ),
2725            ]
2726        );
2727    }
2728
2729    #[test]
2730    fn regions_lt_4gb_bios() {
2731        let arch_memory_layout = setup();
2732        let bios_len = 1 * MB;
2733        let regions = arch_memory_regions(&arch_memory_layout, 512 * MB, Some(bios_len));
2734        assert_eq!(
2735            regions,
2736            [
2737                (
2738                    GuestAddress(0),
2739                    640 * KB,
2740                    MemoryRegionOptions {
2741                        align: 0,
2742                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2743                        file_backed: None,
2744                    },
2745                ),
2746                (
2747                    GuestAddress(640 * KB),
2748                    384 * KB,
2749                    MemoryRegionOptions {
2750                        align: 0,
2751                        purpose: MemoryRegionPurpose::ReservedMemory,
2752                        file_backed: None,
2753                    },
2754                ),
2755                (
2756                    GuestAddress(1 * MB),
2757                    512 * MB - 1 * MB,
2758                    MemoryRegionOptions {
2759                        align: 0,
2760                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2761                        file_backed: None,
2762                    },
2763                ),
2764                (
2765                    GuestAddress(4 * GB - bios_len),
2766                    bios_len,
2767                    MemoryRegionOptions {
2768                        align: 0,
2769                        purpose: MemoryRegionPurpose::Bios,
2770                        file_backed: None,
2771                    },
2772                ),
2773            ]
2774        );
2775    }
2776
2777    #[test]
2778    fn regions_gt_4gb_bios() {
2779        let arch_memory_layout = setup();
2780        let bios_len = 1 * MB;
2781        let regions = arch_memory_regions(&arch_memory_layout, 4 * GB + 0x8000, Some(bios_len));
2782        assert_eq!(
2783            regions,
2784            [
2785                (
2786                    GuestAddress(0),
2787                    640 * KB,
2788                    MemoryRegionOptions {
2789                        align: 0,
2790                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2791                        file_backed: None,
2792                    },
2793                ),
2794                (
2795                    GuestAddress(640 * KB),
2796                    384 * KB,
2797                    MemoryRegionOptions {
2798                        align: 0,
2799                        purpose: MemoryRegionPurpose::ReservedMemory,
2800                        file_backed: None,
2801                    },
2802                ),
2803                (
2804                    GuestAddress(1 * MB),
2805                    2 * GB - 1 * MB,
2806                    MemoryRegionOptions {
2807                        align: 0,
2808                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2809                        file_backed: None,
2810                    },
2811                ),
2812                (
2813                    GuestAddress(4 * GB - bios_len),
2814                    bios_len,
2815                    MemoryRegionOptions {
2816                        align: 0,
2817                        purpose: MemoryRegionPurpose::Bios,
2818                        file_backed: None,
2819                    },
2820                ),
2821                (
2822                    GuestAddress(4 * GB),
2823                    2 * GB + 0x8000,
2824                    MemoryRegionOptions {
2825                        align: 0,
2826                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2827                        file_backed: None,
2828                    },
2829                ),
2830            ]
2831        );
2832    }
2833
2834    #[test]
2835    fn regions_eq_4gb_nobios() {
2836        let arch_memory_layout = setup();
2837        // Test with exact size of 4GB - the overhead.
2838        let regions = arch_memory_regions(&arch_memory_layout, 2 * GB, /* bios_size */ None);
2839        assert_eq!(
2840            regions,
2841            [
2842                (
2843                    GuestAddress(0),
2844                    640 * KB,
2845                    MemoryRegionOptions {
2846                        align: 0,
2847                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2848                        file_backed: None,
2849                    },
2850                ),
2851                (
2852                    GuestAddress(640 * KB),
2853                    384 * KB,
2854                    MemoryRegionOptions {
2855                        align: 0,
2856                        purpose: MemoryRegionPurpose::ReservedMemory,
2857                        file_backed: None,
2858                    },
2859                ),
2860                (
2861                    GuestAddress(1 * MB),
2862                    2 * GB - 1 * MB,
2863                    MemoryRegionOptions {
2864                        align: 0,
2865                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2866                        file_backed: None,
2867                    },
2868                )
2869            ]
2870        );
2871    }
2872
2873    #[test]
2874    fn regions_eq_4gb_bios() {
2875        let arch_memory_layout = setup();
2876        // Test with exact size of 4GB - the overhead.
2877        let bios_len = 1 * MB;
2878        let regions = arch_memory_regions(&arch_memory_layout, 2 * GB, Some(bios_len));
2879        assert_eq!(
2880            regions,
2881            [
2882                (
2883                    GuestAddress(0),
2884                    640 * KB,
2885                    MemoryRegionOptions {
2886                        align: 0,
2887                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2888                        file_backed: None,
2889                    },
2890                ),
2891                (
2892                    GuestAddress(640 * KB),
2893                    384 * KB,
2894                    MemoryRegionOptions {
2895                        align: 0,
2896                        purpose: MemoryRegionPurpose::ReservedMemory,
2897                        file_backed: None,
2898                    },
2899                ),
2900                (
2901                    GuestAddress(1 * MB),
2902                    2 * GB - 1 * MB,
2903                    MemoryRegionOptions {
2904                        align: 0,
2905                        purpose: MemoryRegionPurpose::GuestMemoryRegion,
2906                        file_backed: None,
2907                    },
2908                ),
2909                (
2910                    GuestAddress(4 * GB - bios_len),
2911                    bios_len,
2912                    MemoryRegionOptions {
2913                        align: 0,
2914                        purpose: MemoryRegionPurpose::Bios,
2915                        file_backed: None,
2916                    },
2917                ),
2918            ]
2919        );
2920    }
2921
2922    #[test]
2923    fn check_pci_mmio_layout() {
2924        let arch_memory_layout = setup();
2925
2926        assert_eq!(arch_memory_layout.pci_mmio_before_32bit.start, 2 * GB);
2927        assert_eq!(arch_memory_layout.pcie_cfg_mmio.start, 3 * GB);
2928        assert_eq!(arch_memory_layout.pcie_cfg_mmio.len().unwrap(), 256 * MB);
2929    }
2930
2931    #[test]
2932    fn check_32bit_gap_size_alignment() {
2933        let arch_memory_layout = setup();
2934        // pci_mmio_before_32bit is 256 MB aligned to be friendly for MTRR mappings.
2935        assert_eq!(
2936            arch_memory_layout.pci_mmio_before_32bit.start % (256 * MB),
2937            0
2938        );
2939    }
2940
2941    #[test]
2942    fn write_setup_data_empty() {
2943        let mem = GuestMemory::new(&[(GuestAddress(0), 0x2_0000)]).unwrap();
2944        let setup_data = [];
2945        let setup_data_addr = write_setup_data(
2946            &mem,
2947            GuestAddress(0x1000),
2948            GuestAddress(0x2000),
2949            &setup_data,
2950        )
2951        .expect("write_setup_data");
2952        assert_eq!(setup_data_addr, None);
2953    }
2954
2955    #[test]
2956    fn write_setup_data_two_of_them() {
2957        let mem = GuestMemory::new(&[(GuestAddress(0), 0x2_0000)]).unwrap();
2958
2959        let entry1_addr = GuestAddress(0x1000);
2960        let entry1_next_addr = entry1_addr;
2961        let entry1_len_addr = entry1_addr.checked_add(12).unwrap();
2962        let entry1_data_addr = entry1_addr.checked_add(16).unwrap();
2963        let entry1_data = [0x55u8; 13];
2964        let entry1_size = (size_of::<setup_data_hdr>() + entry1_data.len()) as u64;
2965        let entry1_align = 3;
2966
2967        let entry2_addr = GuestAddress(entry1_addr.offset() + entry1_size + entry1_align);
2968        let entry2_next_addr = entry2_addr;
2969        let entry2_len_addr = entry2_addr.checked_add(12).unwrap();
2970        let entry2_data_addr = entry2_addr.checked_add(16).unwrap();
2971        let entry2_data = [0xAAu8; 9];
2972
2973        let setup_data = [
2974            SetupData {
2975                data: entry1_data.to_vec(),
2976                type_: SetupDataType::Dtb,
2977            },
2978            SetupData {
2979                data: entry2_data.to_vec(),
2980                type_: SetupDataType::Dtb,
2981            },
2982        ];
2983
2984        let setup_data_head_addr = write_setup_data(
2985            &mem,
2986            GuestAddress(0x1000),
2987            GuestAddress(0x2000),
2988            &setup_data,
2989        )
2990        .expect("write_setup_data");
2991        assert_eq!(setup_data_head_addr, Some(entry1_addr));
2992
2993        assert_eq!(
2994            mem.read_obj_from_addr::<u64>(entry1_next_addr).unwrap(),
2995            entry2_addr.offset()
2996        );
2997        assert_eq!(
2998            mem.read_obj_from_addr::<u32>(entry1_len_addr).unwrap(),
2999            entry1_data.len() as u32
3000        );
3001        assert_eq!(
3002            mem.read_obj_from_addr::<[u8; 13]>(entry1_data_addr)
3003                .unwrap(),
3004            entry1_data
3005        );
3006
3007        assert_eq!(mem.read_obj_from_addr::<u64>(entry2_next_addr).unwrap(), 0);
3008        assert_eq!(
3009            mem.read_obj_from_addr::<u32>(entry2_len_addr).unwrap(),
3010            entry2_data.len() as u32
3011        );
3012        assert_eq!(
3013            mem.read_obj_from_addr::<[u8; 9]>(entry2_data_addr).unwrap(),
3014            entry2_data
3015        );
3016    }
3017
3018    #[test]
3019    fn cmdline_overflow() {
3020        const MEM_SIZE: u64 = 0x1000;
3021        let gm = GuestMemory::new(&[(GuestAddress(0x0), MEM_SIZE)]).unwrap();
3022        let mut cmdline = kernel_cmdline::Cmdline::new();
3023        cmdline.insert_str("12345").unwrap();
3024        let cmdline_address = GuestAddress(MEM_SIZE - 5);
3025        let err =
3026            X8664arch::load_cmdline(&gm, cmdline_address, cmdline, CMDLINE_MAX_SIZE as usize - 1)
3027                .unwrap_err();
3028        assert!(matches!(err, Error::CommandLineOverflow));
3029    }
3030
3031    #[test]
3032    fn cmdline_write_end() {
3033        const MEM_SIZE: u64 = 0x1000;
3034        let gm = GuestMemory::new(&[(GuestAddress(0x0), MEM_SIZE)]).unwrap();
3035        let mut cmdline = kernel_cmdline::Cmdline::new();
3036        cmdline.insert_str("1234").unwrap();
3037        let mut cmdline_address = GuestAddress(45);
3038        X8664arch::load_cmdline(&gm, cmdline_address, cmdline, CMDLINE_MAX_SIZE as usize - 1)
3039            .unwrap();
3040        let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
3041        assert_eq!(val, b'1');
3042        cmdline_address = cmdline_address.unchecked_add(1);
3043        let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
3044        assert_eq!(val, b'2');
3045        cmdline_address = cmdline_address.unchecked_add(1);
3046        let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
3047        assert_eq!(val, b'3');
3048        cmdline_address = cmdline_address.unchecked_add(1);
3049        let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
3050        assert_eq!(val, b'4');
3051        cmdline_address = cmdline_address.unchecked_add(1);
3052        let val: u8 = gm.read_obj_from_addr(cmdline_address).unwrap();
3053        assert_eq!(val, b'\0');
3054    }
3055}