riscv64/
lib.rs

1// Copyright 2023 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! RISC-V 64-bit architecture support.
6
7#![cfg(target_arch = "riscv64")]
8
9use std::collections::BTreeMap;
10use std::io::{self};
11use std::path::PathBuf;
12use std::sync::mpsc;
13use std::sync::Arc;
14
15use arch::get_serial_cmdline;
16use arch::CpuSet;
17use arch::DtbOverlay;
18use arch::FdtPosition;
19use arch::GetSerialCmdlineError;
20use arch::RunnableLinuxVm;
21use arch::VmComponents;
22use arch::VmImage;
23use base::Event;
24use base::SendTube;
25use base::Tube;
26use devices::serial_device::SerialHardware;
27use devices::serial_device::SerialParameters;
28use devices::Bus;
29use devices::BusDeviceObj;
30use devices::BusError;
31use devices::BusType;
32use devices::IrqChipRiscv64;
33use devices::PciAddress;
34use devices::PciConfigMmio;
35use devices::PciDevice;
36use devices::PciRootCommand;
37#[cfg(feature = "gdb")]
38use gdbstub::arch::Arch;
39#[cfg(feature = "gdb")]
40use gdbstub_arch::riscv::Riscv64 as GdbArch;
41use hypervisor::CoreRegister;
42use hypervisor::CpuConfigRiscv64;
43use hypervisor::Hypervisor;
44use hypervisor::ProtectionType;
45use hypervisor::TimerRegister;
46use hypervisor::VcpuInitRiscv64;
47use hypervisor::VcpuRegister;
48use hypervisor::VcpuRiscv64;
49use hypervisor::Vm;
50use hypervisor::VmRiscv64;
51#[cfg(windows)]
52use jail::FakeMinijailStub as Minijail;
53#[cfg(any(target_os = "android", target_os = "linux"))]
54use minijail::Minijail;
55use remain::sorted;
56use resources::AddressRange;
57use resources::SystemAllocator;
58use resources::SystemAllocatorConfig;
59use sync::Condvar;
60use sync::Mutex;
61use thiserror::Error;
62use vm_control::BatteryType;
63use vm_memory::GuestAddress;
64#[cfg(feature = "gdb")]
65use vm_memory::GuestMemory;
66use vm_memory::MemoryRegionOptions;
67
68mod fdt;
69
70// We place the kernel at offset 8MB
71const RISCV64_KERNEL_OFFSET: u64 = 0x20_0000;
72const RISCV64_INITRD_ALIGN: u64 = 8;
73const RISCV64_FDT_ALIGN: u64 = 0x40_0000;
74
75// Maximum Linux riscv kernel command line size (arch/riscv/include/uapi/asm/setup.h).
76const RISCV64_CMDLINE_MAX_SIZE: usize = 1024;
77
78// This indicates the start of DRAM inside the physical address space.
79const RISCV64_PHYS_MEM_START: u64 = 0x8000_0000;
80
81// PCI MMIO configuration region base address.
82const RISCV64_PCI_CFG_BASE: u64 = 0x1_0000;
83// PCI MMIO configuration region size.
84const RISCV64_PCI_CFG_SIZE: u64 = 0x100_0000;
85// This is the base address of MMIO devices.
86const RISCV64_MMIO_BASE: u64 = 0x0300_0000;
87// Size of the whole MMIO region.
88const RISCV64_MMIO_SIZE: u64 = 0x10_0000;
89
90const RISCV64_FDT_MAX_SIZE: u64 = 0x1_0000;
91
92fn get_kernel_addr() -> GuestAddress {
93    GuestAddress(RISCV64_PHYS_MEM_START + RISCV64_KERNEL_OFFSET)
94}
95
96const RISCV64_IRQ_BASE: u32 = 1;
97
98#[sorted]
99#[derive(Error, Debug)]
100pub enum Error {
101    #[error("unable to clone an Event: {0}")]
102    CloneEvent(base::Error),
103    #[error("failed to clone IRQ chip: {0}")]
104    CloneIrqChip(base::Error),
105    #[error("the given kernel command line was invalid: {0}")]
106    Cmdline(kernel_cmdline::Error),
107    #[error("unable to make an Event: {0}")]
108    CreateEvent(base::Error),
109    #[error("FDT could not be created: {0}")]
110    CreateFdt(cros_fdt::Error),
111    #[error("failed to create a PCI root hub: {0}")]
112    CreatePciRoot(arch::DeviceRegistrationError),
113    #[error("failed to create platform bus: {0}")]
114    CreatePlatformBus(arch::DeviceRegistrationError),
115    #[error("unable to create serial devices: {0}")]
116    CreateSerialDevices(arch::DeviceRegistrationError),
117    #[error("failed to create socket: {0}")]
118    CreateSocket(io::Error),
119    #[error("failed to create VCPU: {0}")]
120    CreateVcpu(base::Error),
121    #[error("vm created wrong kind of vcpu")]
122    DowncastVcpu,
123    #[error("failed to finalize devices: {0}")]
124    FinalizeDevices(base::Error),
125    #[error("failed to finalize IRQ chip: {0}")]
126    FinalizeIrqChip(base::Error),
127    #[error("failed to get serial cmdline: {0}")]
128    GetSerialCmdline(GetSerialCmdlineError),
129    #[error("Failed to get the timer base frequency: {0}")]
130    GetTimebase(base::Error),
131    #[error("Image type not supported on riscv")]
132    ImageTypeUnsupported,
133    #[error("initrd could not be loaded: {0}")]
134    InitrdLoadFailure(arch::LoadImageError),
135    #[error("kernel could not be loaded: {0}")]
136    KernelLoadFailure(arch::LoadImageError),
137    #[error("PCI mem region not configurable on riscv (yet)")]
138    PciMemNotConfigurable,
139    #[error("protected vms not supported on riscv (yet)")]
140    ProtectedVmUnsupported,
141    #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
142    RamoopsAddress(u64, u64),
143    #[error("failed to register irq fd: {0}")]
144    RegisterIrqfd(base::Error),
145    #[error("error registering PCI bus: {0}")]
146    RegisterPci(BusError),
147    #[error("error registering virtual socket device: {0}")]
148    RegisterVsock(arch::DeviceRegistrationError),
149    #[error("failed to set device attr: {0}")]
150    SetDeviceAttr(base::Error),
151    #[error("failed to set register: {0}")]
152    SetReg(base::Error),
153    #[error("Timebase frequency too large")]
154    TimebaseTooLarge,
155    #[error("this function isn't supported")]
156    Unsupported,
157    #[error("failed to initialize VCPU: {0}")]
158    VcpuInit(base::Error),
159}
160
161pub type Result<T> = std::result::Result<T, Error>;
162
163pub struct ArchMemoryLayout {}
164
165pub struct Riscv64;
166
167impl arch::LinuxArch for Riscv64 {
168    type Error = Error;
169    type ArchMemoryLayout = ArchMemoryLayout;
170
171    fn arch_memory_layout(
172        components: &VmComponents,
173    ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
174        if components.pci_config.mem.is_some() {
175            return Err(Error::PciMemNotConfigurable);
176        }
177        Ok(ArchMemoryLayout {})
178    }
179
180    /// Returns a Vec of the valid memory addresses.
181    /// These should be used to configure the GuestMemory structure for the platfrom.
182    fn guest_memory_layout(
183        components: &VmComponents,
184        _arch_memory_layout: &Self::ArchMemoryLayout,
185        _hypervisor: &impl Hypervisor,
186    ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
187        Ok(vec![(
188            GuestAddress(RISCV64_PHYS_MEM_START),
189            components.memory_size,
190            Default::default(),
191        )])
192    }
193
194    fn get_system_allocator_config<V: Vm>(
195        vm: &V,
196        _arch_memory_layout: &Self::ArchMemoryLayout,
197    ) -> SystemAllocatorConfig {
198        let (high_mmio_base, high_mmio_size) =
199            get_high_mmio_base_size(vm.get_memory().memory_size(), vm.get_guest_phys_addr_bits());
200        SystemAllocatorConfig {
201            io: None,
202            low_mmio: AddressRange::from_start_and_size(RISCV64_MMIO_BASE, RISCV64_MMIO_SIZE)
203                .expect("invalid mmio region"),
204            high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
205                .expect("invalid high mmio region"),
206            platform_mmio: None,
207            first_irq: RISCV64_IRQ_BASE,
208        }
209    }
210
211    fn build_vm<V, Vcpu>(
212        mut components: VmComponents,
213        _arch_memory_layout: &Self::ArchMemoryLayout,
214        _vm_evt_wrtube: &SendTube,
215        system_allocator: &mut SystemAllocator,
216        serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
217        serial_jail: Option<Minijail>,
218        (_bat_type, _bat_jail): (Option<BatteryType>, Option<Minijail>),
219        mut vm: V,
220        ramoops_region: Option<arch::pstore::RamoopsRegion>,
221        devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
222        irq_chip: &mut dyn IrqChipRiscv64,
223        vcpu_ids: &mut Vec<usize>,
224        _dump_device_tree_blob: Option<PathBuf>,
225        _debugcon_jail: Option<Minijail>,
226        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
227        _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
228        device_tree_overlays: Vec<DtbOverlay>,
229        fdt_position: Option<FdtPosition>,
230        _no_pmu: bool,
231    ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
232    where
233        V: VmRiscv64,
234        Vcpu: VcpuRiscv64,
235    {
236        if components.hv_cfg.protection_type == ProtectionType::Protected {
237            return Err(Error::ProtectedVmUnsupported);
238        }
239
240        let mem = vm.get_memory().clone();
241
242        let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
243
244        // Riscv doesn't really use the io bus like x86, so just create an empty bus.
245        let io_bus = Arc::new(Bus::new(BusType::Io));
246        let hypercall_bus = Arc::new(Bus::new(BusType::Hypercall));
247
248        let com_evt_1_3 = Event::new().map_err(Error::CreateEvent)?;
249        let com_evt_2_4 = Event::new().map_err(Error::CreateEvent)?;
250        let serial_devices = arch::add_serial_devices(
251            components.hv_cfg.protection_type,
252            &mmio_bus,
253            // TODO: the IRQ numbers are bogus since the events aren't actually wired up
254            (0, &com_evt_1_3),
255            (0, &com_evt_2_4),
256            serial_parameters,
257            serial_jail,
258            #[cfg(feature = "swap")]
259            swap_controller,
260        )
261        .map_err(Error::CreateSerialDevices)?;
262
263        let (pci_devices, others): (Vec<_>, Vec<_>) = devices
264            .into_iter()
265            .partition(|(dev, _)| dev.as_pci_device().is_some());
266        let pci_devices = pci_devices
267            .into_iter()
268            .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
269            .collect();
270        let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
271            arch::generate_pci_root(
272                pci_devices,
273                irq_chip.as_irq_chip_mut(),
274                Arc::clone(&mmio_bus),
275                GuestAddress(RISCV64_PCI_CFG_BASE),
276                8,
277                Arc::clone(&io_bus),
278                system_allocator,
279                &mut vm,
280                devices::IMSIC_MAX_INT_IDS as usize,
281                None,
282                #[cfg(feature = "swap")]
283                swap_controller,
284            )
285            .map_err(Error::CreatePciRoot)?;
286
287        let pci_root = Arc::new(Mutex::new(pci));
288        let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
289        let (platform_devices, _others): (Vec<_>, Vec<_>) = others
290            .into_iter()
291            .partition(|(dev, _)| dev.as_platform_device().is_some());
292
293        let platform_devices = platform_devices
294            .into_iter()
295            .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
296            .collect();
297        let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
298            arch::sys::linux::generate_platform_bus(
299                platform_devices,
300                irq_chip.as_irq_chip_mut(),
301                &mmio_bus,
302                system_allocator,
303                &mut vm,
304                #[cfg(feature = "swap")]
305                swap_controller,
306                &mut None,
307                components.hv_cfg.protection_type,
308            )
309            .map_err(Error::CreatePlatformBus)?;
310        pid_debug_label_map.append(&mut platform_pid_debug_label_map);
311
312        let mut cmdline = get_base_linux_cmdline();
313
314        if let Some(ramoops_region) = ramoops_region {
315            arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
316                .map_err(Error::Cmdline)?;
317        }
318
319        mmio_bus
320            .insert(pci_bus, RISCV64_PCI_CFG_BASE, RISCV64_PCI_CFG_SIZE)
321            .map_err(Error::RegisterPci)?;
322
323        get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
324            .map_err(Error::GetSerialCmdline)?;
325        for param in components.extra_kernel_params {
326            cmdline.insert_str(&param).map_err(Error::Cmdline)?;
327        }
328
329        // Event used by PMDevice to notify crosvm that guest OS is trying to suspend.
330        let (suspend_tube_send, suspend_tube_recv) = Tube::directional_pair().unwrap();
331
332        // separate out image loading from other setup to get a specific error for
333        // image loading
334        let initrd: Option<(GuestAddress, u32)>;
335        let kernel_initrd_end = match components.vm_image {
336            VmImage::Bios(ref _bios) => {
337                return Err(Error::ImageTypeUnsupported);
338            }
339            VmImage::Kernel(ref mut kernel_image) => {
340                let kernel_size = arch::load_image(&mem, kernel_image, get_kernel_addr(), u64::MAX)
341                    .map_err(Error::KernelLoadFailure)?;
342                let kernel_end = get_kernel_addr().offset() + kernel_size as u64;
343                initrd = match components.initrd_image {
344                    Some(initrd_file) => {
345                        let mut initrd_file = initrd_file;
346                        let initrd_addr =
347                            (kernel_end + (RISCV64_INITRD_ALIGN - 1)) & !(RISCV64_INITRD_ALIGN - 1);
348                        let initrd_max_size =
349                            components.memory_size - (initrd_addr - RISCV64_PHYS_MEM_START);
350                        let initrd_addr = GuestAddress(initrd_addr);
351                        let initrd_size =
352                            arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
353                                .map_err(Error::InitrdLoadFailure)?;
354                        Some((initrd_addr, initrd_size))
355                    }
356                    None => None,
357                };
358                if let Some((initrd_addr, initrd_size)) = initrd {
359                    initrd_addr.offset() + initrd_size as u64 - RISCV64_PHYS_MEM_START
360                } else {
361                    kernel_end - RISCV64_PHYS_MEM_START
362                }
363            }
364        };
365
366        // Creates vcpus early as the irqchip needs them created to attach interrupts.
367        let vcpu_count = components.vcpu_count;
368        let mut vcpus = Vec::with_capacity(vcpu_count);
369        for vcpu_id in 0..vcpu_count {
370            let vcpu: Vcpu = *vm
371                .create_vcpu(vcpu_id)
372                .map_err(Error::CreateVcpu)?
373                .downcast::<Vcpu>()
374                .map_err(|_| Error::DowncastVcpu)?;
375            vcpus.push(vcpu);
376            vcpu_ids.push(vcpu_id);
377        }
378
379        irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
380
381        irq_chip
382            .finalize_devices(system_allocator, &io_bus, &mmio_bus)
383            .map_err(Error::FinalizeDevices)?;
384        let (aia_num_ids, aia_num_sources) = irq_chip.get_num_ids_sources();
385
386        let pci_cfg = fdt::PciConfigRegion {
387            base: RISCV64_PCI_CFG_BASE,
388            size: RISCV64_PCI_CFG_SIZE,
389        };
390
391        let pci_ranges: Vec<fdt::PciRange> = system_allocator
392            .mmio_pools()
393            .iter()
394            .map(|range| fdt::PciRange {
395                space: fdt::PciAddressSpace::Memory64,
396                bus_address: range.start,
397                cpu_physical_address: range.start,
398                size: range.len().unwrap(),
399                prefetchable: false,
400            })
401            .collect();
402
403        assert!(
404            matches!(fdt_position, None | Some(FdtPosition::AfterPayload)),
405            "fdt_position={fdt_position:?} not supported"
406        );
407        let fdt_offset = (kernel_initrd_end + (RISCV64_FDT_ALIGN - 1)) & !(RISCV64_FDT_ALIGN - 1);
408
409        let timebase_freq: u32 = vcpus[0]
410            .get_one_reg(VcpuRegister::Timer(TimerRegister::TimebaseFrequency))
411            .map_err(Error::GetTimebase)?
412            .try_into()
413            .map_err(|_| Error::TimebaseTooLarge)?;
414
415        fdt::create_fdt(
416            RISCV64_FDT_MAX_SIZE as usize,
417            &mem,
418            pci_irqs,
419            pci_cfg,
420            &pci_ranges,
421            dev_resources,
422            components.vcpu_count as u32,
423            fdt_offset,
424            aia_num_ids,
425            aia_num_sources,
426            cmdline
427                .as_str_with_max_len(RISCV64_CMDLINE_MAX_SIZE - 1)
428                .map_err(Error::Cmdline)?,
429            initrd,
430            timebase_freq,
431            device_tree_overlays,
432        )
433        .map_err(Error::CreateFdt)?;
434
435        let vcpu_init = vec![
436            VcpuInitRiscv64::new(GuestAddress(fdt_offset + RISCV64_PHYS_MEM_START));
437            vcpu_count
438        ];
439
440        Ok(RunnableLinuxVm {
441            vm,
442            vcpu_count: components.vcpu_count,
443            vcpus: Some(vcpus),
444            vcpu_init,
445            vcpu_affinity: components.vcpu_affinity,
446            no_smt: false,
447            irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
448            hypercall_bus,
449            io_bus,
450            mmio_bus,
451            pid_debug_label_map,
452            resume_notify_devices: Vec::new(),
453            root_config: pci_root,
454            platform_devices,
455            hotplug_bus: BTreeMap::new(),
456            rt_cpus: components.rt_cpus,
457            delay_rt: components.delay_rt,
458            suspend_tube: (Arc::new(Mutex::new(suspend_tube_send)), suspend_tube_recv),
459            bat_control: None,
460            pm: None,
461            devices_thread: None,
462            vm_request_tubes: Vec::new(),
463        })
464    }
465
466    fn configure_vcpu<V: Vm>(
467        _vm: &V,
468        _hypervisor: &dyn Hypervisor,
469        _irq_chip: &mut dyn IrqChipRiscv64,
470        vcpu: &mut dyn VcpuRiscv64,
471        _vcpu_init: VcpuInitRiscv64,
472        vcpu_id: usize,
473        _num_cpus: usize,
474        cpu_config: Option<CpuConfigRiscv64>,
475    ) -> std::result::Result<(), Self::Error> {
476        vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::Pc), get_kernel_addr().0)
477            .map_err(Self::Error::SetReg)?;
478        vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::A0), vcpu_id as u64)
479            .map_err(Self::Error::SetReg)?;
480        vcpu.set_one_reg(
481            VcpuRegister::Core(CoreRegister::A1),
482            cpu_config.unwrap().fdt_address.0,
483        )
484        .map_err(Self::Error::SetReg)?;
485
486        Ok(())
487    }
488
489    fn register_pci_device<V: VmRiscv64, Vcpu: VcpuRiscv64>(
490        _linux: &mut RunnableLinuxVm<V, Vcpu>,
491        _device: Box<dyn PciDevice>,
492        _minijail: Option<Minijail>,
493        _resources: &mut SystemAllocator,
494        _tube: &mpsc::Sender<PciRootCommand>,
495        #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
496    ) -> std::result::Result<PciAddress, Self::Error> {
497        // hotplug function isn't verified on Riscv64, so set it unsupported here.
498        Err(Error::Unsupported)
499    }
500
501    fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>> {
502        Ok(BTreeMap::new())
503    }
504
505    fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>> {
506        Ok(BTreeMap::new())
507    }
508
509    fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>> {
510        Ok(BTreeMap::new())
511    }
512
513    fn get_host_cpu_clusters() -> Result<Vec<CpuSet>> {
514        Ok(Vec::new())
515    }
516}
517
518#[cfg(feature = "gdb")]
519impl<T: VcpuRiscv64> arch::GdbOps<T> for Riscv64 {
520    type Error = Error;
521
522    fn read_memory(
523        _vcpu: &T,
524        _guest_mem: &GuestMemory,
525        _vaddr: GuestAddress,
526        _len: usize,
527    ) -> Result<Vec<u8>> {
528        unimplemented!();
529    }
530
531    fn write_memory(
532        _vcpu: &T,
533        _guest_mem: &GuestMemory,
534        _vaddr: GuestAddress,
535        _buf: &[u8],
536    ) -> Result<()> {
537        unimplemented!();
538    }
539
540    fn read_registers(_vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
541        unimplemented!();
542    }
543
544    fn write_registers(_vcpu: &T, _regs: &<GdbArch as Arch>::Registers) -> Result<()> {
545        unimplemented!();
546    }
547
548    fn read_register(_vcpu: &T, _reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
549        unimplemented!();
550    }
551
552    fn write_register(_vcpu: &T, _reg_id: <GdbArch as Arch>::RegId, _data: &[u8]) -> Result<()> {
553        unimplemented!();
554    }
555
556    fn enable_singlestep(_vcpu: &T) -> Result<()> {
557        unimplemented!();
558    }
559
560    fn get_max_hw_breakpoints(_vcpu: &T) -> Result<usize> {
561        unimplemented!();
562    }
563
564    fn set_hw_breakpoints(_vcpu: &T, _breakpoints: &[GuestAddress]) -> Result<()> {
565        unimplemented!();
566    }
567}
568
569fn get_high_mmio_base_size(mem_size: u64, guest_phys_addr_bits: u8) -> (u64, u64) {
570    let guest_phys_end = 1u64 << guest_phys_addr_bits;
571    let high_mmio_base = RISCV64_PHYS_MEM_START + mem_size;
572    let size = guest_phys_end
573        .checked_sub(high_mmio_base)
574        .unwrap_or_else(|| {
575            panic!("guest_phys_end {guest_phys_end:#x} < high_mmio_base {high_mmio_base:#x}",);
576        });
577    (high_mmio_base, size)
578}
579
580fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
581    let mut cmdline = kernel_cmdline::Cmdline::new();
582    cmdline.insert_str("panic=-1").unwrap();
583    cmdline
584}