riscv64/
lib.rs

1// Copyright 2023 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! RISC-V 64-bit architecture support.
6
7#![cfg(target_arch = "riscv64")]
8
9use std::collections::BTreeMap;
10use std::io::{self};
11use std::path::PathBuf;
12use std::sync::mpsc;
13use std::sync::Arc;
14
15use arch::get_serial_cmdline;
16use arch::CpuSet;
17use arch::DtbOverlay;
18use arch::FdtPosition;
19use arch::GetSerialCmdlineError;
20use arch::RunnableLinuxVm;
21use arch::VmComponents;
22use arch::VmImage;
23use base::Event;
24use base::SendTube;
25use base::Tube;
26use devices::serial_device::SerialHardware;
27use devices::serial_device::SerialParameters;
28use devices::Bus;
29use devices::BusDeviceObj;
30use devices::BusError;
31use devices::BusType;
32use devices::IrqChipRiscv64;
33use devices::PciAddress;
34use devices::PciConfigMmio;
35use devices::PciDevice;
36use devices::PciRootCommand;
37#[cfg(feature = "gdb")]
38use gdbstub::arch::Arch;
39#[cfg(feature = "gdb")]
40use gdbstub_arch::riscv::Riscv64 as GdbArch;
41use hypervisor::CoreRegister;
42use hypervisor::CpuConfigRiscv64;
43use hypervisor::Hypervisor;
44use hypervisor::ProtectionType;
45use hypervisor::TimerRegister;
46use hypervisor::VcpuInitRiscv64;
47use hypervisor::VcpuRegister;
48use hypervisor::VcpuRiscv64;
49use hypervisor::Vm;
50use hypervisor::VmRiscv64;
51#[cfg(windows)]
52use jail::FakeMinijailStub as Minijail;
53#[cfg(any(target_os = "android", target_os = "linux"))]
54use minijail::Minijail;
55use remain::sorted;
56use resources::AddressRange;
57use resources::SystemAllocator;
58use resources::SystemAllocatorConfig;
59use sync::Condvar;
60use sync::Mutex;
61use thiserror::Error;
62use vm_control::BatteryType;
63use vm_memory::GuestAddress;
64#[cfg(feature = "gdb")]
65use vm_memory::GuestMemory;
66use vm_memory::MemoryRegionOptions;
67
68mod fdt;
69
70// We place the kernel at offset 8MB
71const RISCV64_KERNEL_OFFSET: u64 = 0x20_0000;
72const RISCV64_INITRD_ALIGN: u64 = 8;
73const RISCV64_FDT_ALIGN: u64 = 0x40_0000;
74
75// Maximum Linux riscv kernel command line size (arch/riscv/include/uapi/asm/setup.h).
76const RISCV64_CMDLINE_MAX_SIZE: usize = 1024;
77
78// This indicates the start of DRAM inside the physical address space.
79const RISCV64_PHYS_MEM_START: u64 = 0x8000_0000;
80
81// PCI MMIO configuration region base address.
82const RISCV64_PCI_CFG_BASE: u64 = 0x1_0000;
83// PCI MMIO configuration region size.
84const RISCV64_PCI_CFG_SIZE: u64 = 0x100_0000;
85// This is the base address of MMIO devices.
86const RISCV64_MMIO_BASE: u64 = 0x0300_0000;
87// Size of the whole MMIO region.
88const RISCV64_MMIO_SIZE: u64 = 0x10_0000;
89
90const RISCV64_FDT_MAX_SIZE: u64 = 0x1_0000;
91
92fn get_kernel_addr() -> GuestAddress {
93    GuestAddress(RISCV64_PHYS_MEM_START + RISCV64_KERNEL_OFFSET)
94}
95
96const RISCV64_IRQ_BASE: u32 = 1;
97
98#[sorted]
99#[derive(Error, Debug)]
100pub enum Error {
101    #[error("unable to clone an Event: {0}")]
102    CloneEvent(base::Error),
103    #[error("failed to clone IRQ chip: {0}")]
104    CloneIrqChip(base::Error),
105    #[error("the given kernel command line was invalid: {0}")]
106    Cmdline(kernel_cmdline::Error),
107    #[error("unable to make an Event: {0}")]
108    CreateEvent(base::Error),
109    #[error("FDT could not be created: {0}")]
110    CreateFdt(cros_fdt::Error),
111    #[error("failed to create a PCI root hub: {0}")]
112    CreatePciRoot(arch::DeviceRegistrationError),
113    #[error("failed to create platform bus: {0}")]
114    CreatePlatformBus(arch::DeviceRegistrationError),
115    #[error("unable to create serial devices: {0}")]
116    CreateSerialDevices(arch::DeviceRegistrationError),
117    #[error("failed to create socket: {0}")]
118    CreateSocket(io::Error),
119    #[error("failed to create VCPU: {0}")]
120    CreateVcpu(base::Error),
121    #[error("vm created wrong kind of vcpu")]
122    DowncastVcpu,
123    #[error("failed to finalize devices: {0}")]
124    FinalizeDevices(base::Error),
125    #[error("failed to finalize IRQ chip: {0}")]
126    FinalizeIrqChip(base::Error),
127    #[error("failed to get serial cmdline: {0}")]
128    GetSerialCmdline(GetSerialCmdlineError),
129    #[error("Failed to get the timer base frequency: {0}")]
130    GetTimebase(base::Error),
131    #[error("Image type not supported on riscv")]
132    ImageTypeUnsupported,
133    #[error("initrd could not be loaded: {0}")]
134    InitrdLoadFailure(arch::LoadImageError),
135    #[error("kernel could not be loaded: {0}")]
136    KernelLoadFailure(arch::LoadImageError),
137    #[error("PCI mem region not configurable on riscv (yet)")]
138    PciMemNotConfigurable,
139    #[error("protected vms not supported on riscv (yet)")]
140    ProtectedVmUnsupported,
141    #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
142    RamoopsAddress(u64, u64),
143    #[error("failed to register irq fd: {0}")]
144    RegisterIrqfd(base::Error),
145    #[error("error registering PCI bus: {0}")]
146    RegisterPci(BusError),
147    #[error("error registering virtual socket device: {0}")]
148    RegisterVsock(arch::DeviceRegistrationError),
149    #[error("failed to set device attr: {0}")]
150    SetDeviceAttr(base::Error),
151    #[error("failed to set register: {0}")]
152    SetReg(base::Error),
153    #[error("Timebase frequency too large")]
154    TimebaseTooLarge,
155    #[error("this function isn't supported")]
156    Unsupported,
157    #[error("failed to initialize VCPU: {0}")]
158    VcpuInit(base::Error),
159}
160
161pub type Result<T> = std::result::Result<T, Error>;
162
163pub struct ArchMemoryLayout {}
164
165pub struct Riscv64;
166
167impl arch::LinuxArch for Riscv64 {
168    type Error = Error;
169    type ArchMemoryLayout = ArchMemoryLayout;
170
171    fn arch_memory_layout(
172        components: &VmComponents,
173    ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
174        if components.pci_config.mem.is_some() {
175            return Err(Error::PciMemNotConfigurable);
176        }
177        Ok(ArchMemoryLayout {})
178    }
179
180    /// Returns a Vec of the valid memory addresses.
181    /// These should be used to configure the GuestMemory structure for the platfrom.
182    fn guest_memory_layout(
183        components: &VmComponents,
184        _arch_memory_layout: &Self::ArchMemoryLayout,
185        _hypervisor: &impl Hypervisor,
186    ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
187        Ok(vec![(
188            GuestAddress(RISCV64_PHYS_MEM_START),
189            components.memory_size,
190            Default::default(),
191        )])
192    }
193
194    fn get_system_allocator_config<V: Vm>(
195        vm: &V,
196        _arch_memory_layout: &Self::ArchMemoryLayout,
197    ) -> SystemAllocatorConfig {
198        let (high_mmio_base, high_mmio_size) =
199            get_high_mmio_base_size(vm.get_memory().memory_size(), vm.get_guest_phys_addr_bits());
200        SystemAllocatorConfig {
201            io: None,
202            low_mmio: AddressRange::from_start_and_size(RISCV64_MMIO_BASE, RISCV64_MMIO_SIZE)
203                .expect("invalid mmio region"),
204            high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
205                .expect("invalid high mmio region"),
206            platform_mmio: None,
207            first_irq: RISCV64_IRQ_BASE,
208        }
209    }
210
211    fn build_vm<V, Vcpu>(
212        mut components: VmComponents,
213        _arch_memory_layout: &Self::ArchMemoryLayout,
214        _vm_evt_wrtube: &SendTube,
215        system_allocator: &mut SystemAllocator,
216        serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
217        serial_jail: Option<Minijail>,
218        (_bat_type, _bat_jail): (Option<BatteryType>, Option<Minijail>),
219        mut vm: V,
220        ramoops_region: Option<arch::pstore::RamoopsRegion>,
221        devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
222        irq_chip: &mut dyn IrqChipRiscv64,
223        vcpu_ids: &mut Vec<usize>,
224        _dump_device_tree_blob: Option<PathBuf>,
225        _debugcon_jail: Option<Minijail>,
226        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
227        _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
228        device_tree_overlays: Vec<DtbOverlay>,
229        fdt_position: Option<FdtPosition>,
230        _no_pmu: bool,
231    ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
232    where
233        V: VmRiscv64,
234        Vcpu: VcpuRiscv64,
235    {
236        if components.hv_cfg.protection_type == ProtectionType::Protected {
237            return Err(Error::ProtectedVmUnsupported);
238        }
239
240        let mem = vm.get_memory().clone();
241
242        let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
243
244        // Riscv doesn't really use the io bus like x86, so just create an empty bus.
245        let io_bus = Arc::new(Bus::new(BusType::Io));
246
247        let com_evt_1_3 = Event::new().map_err(Error::CreateEvent)?;
248        let com_evt_2_4 = Event::new().map_err(Error::CreateEvent)?;
249        let serial_devices = arch::add_serial_devices(
250            components.hv_cfg.protection_type,
251            &mmio_bus,
252            // TODO: the IRQ numbers are bogus since the events aren't actually wired up
253            (0, &com_evt_1_3),
254            (0, &com_evt_2_4),
255            serial_parameters,
256            serial_jail,
257            #[cfg(feature = "swap")]
258            swap_controller,
259        )
260        .map_err(Error::CreateSerialDevices)?;
261
262        let (pci_devices, others): (Vec<_>, Vec<_>) = devices
263            .into_iter()
264            .partition(|(dev, _)| dev.as_pci_device().is_some());
265        let pci_devices = pci_devices
266            .into_iter()
267            .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
268            .collect();
269        let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
270            arch::generate_pci_root(
271                pci_devices,
272                irq_chip.as_irq_chip_mut(),
273                Arc::clone(&mmio_bus),
274                GuestAddress(RISCV64_PCI_CFG_BASE),
275                8,
276                Arc::clone(&io_bus),
277                system_allocator,
278                &mut vm,
279                devices::IMSIC_MAX_INT_IDS as usize,
280                None,
281                #[cfg(feature = "swap")]
282                swap_controller,
283            )
284            .map_err(Error::CreatePciRoot)?;
285
286        let pci_root = Arc::new(Mutex::new(pci));
287        let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
288        let (platform_devices, _others): (Vec<_>, Vec<_>) = others
289            .into_iter()
290            .partition(|(dev, _)| dev.as_platform_device().is_some());
291
292        let platform_devices = platform_devices
293            .into_iter()
294            .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
295            .collect();
296        let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
297            arch::sys::linux::generate_platform_bus(
298                platform_devices,
299                irq_chip.as_irq_chip_mut(),
300                &mmio_bus,
301                system_allocator,
302                &mut vm,
303                #[cfg(feature = "swap")]
304                swap_controller,
305                components.hv_cfg.protection_type,
306            )
307            .map_err(Error::CreatePlatformBus)?;
308        pid_debug_label_map.append(&mut platform_pid_debug_label_map);
309
310        let mut cmdline = get_base_linux_cmdline();
311
312        if let Some(ramoops_region) = ramoops_region {
313            arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
314                .map_err(Error::Cmdline)?;
315        }
316
317        mmio_bus
318            .insert(pci_bus, RISCV64_PCI_CFG_BASE, RISCV64_PCI_CFG_SIZE)
319            .map_err(Error::RegisterPci)?;
320
321        get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
322            .map_err(Error::GetSerialCmdline)?;
323        for param in components.extra_kernel_params {
324            cmdline.insert_str(&param).map_err(Error::Cmdline)?;
325        }
326
327        // Event used by PMDevice to notify crosvm that guest OS is trying to suspend.
328        let (suspend_tube_send, suspend_tube_recv) = Tube::directional_pair().unwrap();
329
330        // separate out image loading from other setup to get a specific error for
331        // image loading
332        let initrd;
333        let kernel_initrd_end = match components.vm_image {
334            VmImage::Bios(ref _bios) => {
335                return Err(Error::ImageTypeUnsupported);
336            }
337            VmImage::Kernel(ref mut kernel_image) => {
338                let kernel_size = arch::load_image(&mem, kernel_image, get_kernel_addr(), u64::MAX)
339                    .map_err(Error::KernelLoadFailure)?;
340                let kernel_end = get_kernel_addr().offset() + kernel_size as u64;
341                initrd = match components.initrd_image {
342                    Some(initrd_file) => {
343                        let mut initrd_file = initrd_file;
344                        let initrd_addr =
345                            (kernel_end + (RISCV64_INITRD_ALIGN - 1)) & !(RISCV64_INITRD_ALIGN - 1);
346                        let initrd_max_size =
347                            components.memory_size - (initrd_addr - RISCV64_PHYS_MEM_START);
348                        let initrd_addr = GuestAddress(initrd_addr);
349                        let initrd_size =
350                            arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
351                                .map_err(Error::InitrdLoadFailure)?;
352                        Some((initrd_addr, initrd_size))
353                    }
354                    None => None,
355                };
356                if let Some((initrd_addr, initrd_size)) = initrd {
357                    initrd_addr.offset() + initrd_size as u64 - RISCV64_PHYS_MEM_START
358                } else {
359                    kernel_end - RISCV64_PHYS_MEM_START
360                }
361            }
362        };
363
364        // Creates vcpus early as the irqchip needs them created to attach interrupts.
365        let vcpu_count = components.vcpu_count;
366        let mut vcpus = Vec::with_capacity(vcpu_count);
367        for vcpu_id in 0..vcpu_count {
368            let vcpu: Vcpu = *vm
369                .create_vcpu(vcpu_id)
370                .map_err(Error::CreateVcpu)?
371                .downcast::<Vcpu>()
372                .map_err(|_| Error::DowncastVcpu)?;
373            vcpus.push(vcpu);
374            vcpu_ids.push(vcpu_id);
375        }
376
377        irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
378
379        irq_chip
380            .finalize_devices(system_allocator, &io_bus, &mmio_bus)
381            .map_err(Error::FinalizeDevices)?;
382        let (aia_num_ids, aia_num_sources) = irq_chip.get_num_ids_sources();
383
384        let pci_cfg = fdt::PciConfigRegion {
385            base: RISCV64_PCI_CFG_BASE,
386            size: RISCV64_PCI_CFG_SIZE,
387        };
388
389        let pci_ranges: Vec<fdt::PciRange> = system_allocator
390            .mmio_pools()
391            .iter()
392            .map(|range| fdt::PciRange {
393                space: fdt::PciAddressSpace::Memory64,
394                bus_address: range.start,
395                cpu_physical_address: range.start,
396                size: range.len().unwrap(),
397                prefetchable: false,
398            })
399            .collect();
400
401        assert!(
402            matches!(fdt_position, None | Some(FdtPosition::AfterPayload)),
403            "fdt_position={fdt_position:?} not supported"
404        );
405        let fdt_offset = (kernel_initrd_end + (RISCV64_FDT_ALIGN - 1)) & !(RISCV64_FDT_ALIGN - 1);
406
407        let timebase_freq: u32 = vcpus[0]
408            .get_one_reg(VcpuRegister::Timer(TimerRegister::TimebaseFrequency))
409            .map_err(Error::GetTimebase)?
410            .try_into()
411            .map_err(|_| Error::TimebaseTooLarge)?;
412
413        fdt::create_fdt(
414            RISCV64_FDT_MAX_SIZE as usize,
415            &mem,
416            pci_irqs,
417            pci_cfg,
418            &pci_ranges,
419            dev_resources,
420            components.vcpu_count as u32,
421            fdt_offset,
422            aia_num_ids,
423            aia_num_sources,
424            cmdline
425                .as_str_with_max_len(RISCV64_CMDLINE_MAX_SIZE - 1)
426                .map_err(Error::Cmdline)?,
427            initrd,
428            timebase_freq,
429            device_tree_overlays,
430        )
431        .map_err(Error::CreateFdt)?;
432
433        let vcpu_init = vec![
434            VcpuInitRiscv64::new(GuestAddress(fdt_offset + RISCV64_PHYS_MEM_START));
435            vcpu_count
436        ];
437
438        Ok(RunnableLinuxVm {
439            vm,
440            vcpu_count: components.vcpu_count,
441            vcpus: Some(vcpus),
442            vcpu_init,
443            vcpu_affinity: components.vcpu_affinity,
444            no_smt: false,
445            irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
446            io_bus,
447            mmio_bus,
448            pid_debug_label_map,
449            resume_notify_devices: Vec::new(),
450            root_config: pci_root,
451            platform_devices,
452            hotplug_bus: BTreeMap::new(),
453            rt_cpus: components.rt_cpus,
454            delay_rt: components.delay_rt,
455            suspend_tube: (Arc::new(Mutex::new(suspend_tube_send)), suspend_tube_recv),
456            bat_control: None,
457            pm: None,
458            devices_thread: None,
459            vm_request_tubes: Vec::new(),
460        })
461    }
462
463    fn configure_vcpu<V: Vm>(
464        _vm: &V,
465        _hypervisor: &dyn Hypervisor,
466        _irq_chip: &mut dyn IrqChipRiscv64,
467        vcpu: &mut dyn VcpuRiscv64,
468        _vcpu_init: VcpuInitRiscv64,
469        vcpu_id: usize,
470        _num_cpus: usize,
471        cpu_config: Option<CpuConfigRiscv64>,
472    ) -> std::result::Result<(), Self::Error> {
473        vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::Pc), get_kernel_addr().0)
474            .map_err(Self::Error::SetReg)?;
475        vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::A0), vcpu_id as u64)
476            .map_err(Self::Error::SetReg)?;
477        vcpu.set_one_reg(
478            VcpuRegister::Core(CoreRegister::A1),
479            cpu_config.unwrap().fdt_address.0,
480        )
481        .map_err(Self::Error::SetReg)?;
482
483        Ok(())
484    }
485
486    fn register_pci_device<V: VmRiscv64, Vcpu: VcpuRiscv64>(
487        _linux: &mut RunnableLinuxVm<V, Vcpu>,
488        _device: Box<dyn PciDevice>,
489        _minijail: Option<Minijail>,
490        _resources: &mut SystemAllocator,
491        _tube: &mpsc::Sender<PciRootCommand>,
492        #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
493    ) -> std::result::Result<PciAddress, Self::Error> {
494        // hotplug function isn't verified on Riscv64, so set it unsupported here.
495        Err(Error::Unsupported)
496    }
497
498    fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>> {
499        Ok(BTreeMap::new())
500    }
501
502    fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>> {
503        Ok(BTreeMap::new())
504    }
505
506    fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>> {
507        Ok(BTreeMap::new())
508    }
509
510    fn get_host_cpu_clusters() -> Result<Vec<CpuSet>> {
511        Ok(Vec::new())
512    }
513}
514
515#[cfg(feature = "gdb")]
516impl<T: VcpuRiscv64> arch::GdbOps<T> for Riscv64 {
517    type Error = Error;
518
519    fn read_memory(
520        _vcpu: &T,
521        _guest_mem: &GuestMemory,
522        _vaddr: GuestAddress,
523        _len: usize,
524    ) -> Result<Vec<u8>> {
525        unimplemented!();
526    }
527
528    fn write_memory(
529        _vcpu: &T,
530        _guest_mem: &GuestMemory,
531        _vaddr: GuestAddress,
532        _buf: &[u8],
533    ) -> Result<()> {
534        unimplemented!();
535    }
536
537    fn read_registers(_vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
538        unimplemented!();
539    }
540
541    fn write_registers(_vcpu: &T, _regs: &<GdbArch as Arch>::Registers) -> Result<()> {
542        unimplemented!();
543    }
544
545    fn read_register(_vcpu: &T, _reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
546        unimplemented!();
547    }
548
549    fn write_register(_vcpu: &T, _reg_id: <GdbArch as Arch>::RegId, _data: &[u8]) -> Result<()> {
550        unimplemented!();
551    }
552
553    fn enable_singlestep(_vcpu: &T) -> Result<()> {
554        unimplemented!();
555    }
556
557    fn get_max_hw_breakpoints(_vcpu: &T) -> Result<usize> {
558        unimplemented!();
559    }
560
561    fn set_hw_breakpoints(_vcpu: &T, _breakpoints: &[GuestAddress]) -> Result<()> {
562        unimplemented!();
563    }
564}
565
566fn get_high_mmio_base_size(mem_size: u64, guest_phys_addr_bits: u8) -> (u64, u64) {
567    let guest_phys_end = 1u64 << guest_phys_addr_bits;
568    let high_mmio_base = RISCV64_PHYS_MEM_START + mem_size;
569    let size = guest_phys_end
570        .checked_sub(high_mmio_base)
571        .unwrap_or_else(|| {
572            panic!("guest_phys_end {guest_phys_end:#x} < high_mmio_base {high_mmio_base:#x}",);
573        });
574    (high_mmio_base, size)
575}
576
577fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
578    let mut cmdline = kernel_cmdline::Cmdline::new();
579    cmdline.insert_str("panic=-1").unwrap();
580    cmdline
581}