riscv64/
lib.rs

1// Copyright 2023 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! RISC-V 64-bit architecture support.
6
7#![cfg(target_arch = "riscv64")]
8
9use std::collections::BTreeMap;
10use std::io::{self};
11use std::path::PathBuf;
12use std::sync::mpsc;
13use std::sync::Arc;
14
15use arch::get_serial_cmdline;
16use arch::CpuSet;
17use arch::DtbOverlay;
18use arch::FdtPosition;
19use arch::GetSerialCmdlineError;
20use arch::RunnableLinuxVm;
21use arch::VmComponents;
22use arch::VmImage;
23use base::Event;
24use base::SendTube;
25use base::Tube;
26use devices::serial_device::SerialHardware;
27use devices::serial_device::SerialParameters;
28use devices::Bus;
29use devices::BusDeviceObj;
30use devices::BusError;
31use devices::BusType;
32use devices::IrqChipRiscv64;
33use devices::PciAddress;
34use devices::PciConfigMmio;
35use devices::PciDevice;
36use devices::PciRootCommand;
37#[cfg(feature = "gdb")]
38use gdbstub::arch::Arch;
39#[cfg(feature = "gdb")]
40use gdbstub_arch::riscv::Riscv64 as GdbArch;
41use hypervisor::CoreRegister;
42use hypervisor::CpuConfigRiscv64;
43use hypervisor::Hypervisor;
44use hypervisor::ProtectionType;
45use hypervisor::TimerRegister;
46use hypervisor::VcpuInitRiscv64;
47use hypervisor::VcpuRegister;
48use hypervisor::VcpuRiscv64;
49use hypervisor::Vm;
50use hypervisor::VmRiscv64;
51#[cfg(windows)]
52use jail::FakeMinijailStub as Minijail;
53#[cfg(any(target_os = "android", target_os = "linux"))]
54use minijail::Minijail;
55use remain::sorted;
56use resources::AddressRange;
57use resources::SystemAllocator;
58use resources::SystemAllocatorConfig;
59use sync::Condvar;
60use sync::Mutex;
61use thiserror::Error;
62use vm_control::BatteryType;
63use vm_memory::GuestAddress;
64#[cfg(feature = "gdb")]
65use vm_memory::GuestMemory;
66use vm_memory::MemoryRegionOptions;
67
68mod fdt;
69
70// We place the kernel at offset 8MB
71const RISCV64_KERNEL_OFFSET: u64 = 0x20_0000;
72const RISCV64_INITRD_ALIGN: u64 = 8;
73const RISCV64_FDT_ALIGN: u64 = 0x40_0000;
74
75// Maximum Linux riscv kernel command line size (arch/riscv/include/uapi/asm/setup.h).
76const RISCV64_CMDLINE_MAX_SIZE: usize = 1024;
77
78// This indicates the start of DRAM inside the physical address space.
79const RISCV64_PHYS_MEM_START: u64 = 0x8000_0000;
80
81// PCI MMIO configuration region base address.
82const RISCV64_PCI_CFG_BASE: u64 = 0x1_0000;
83// PCI MMIO configuration region size.
84const RISCV64_PCI_CFG_SIZE: u64 = 0x100_0000;
85// This is the base address of MMIO devices.
86const RISCV64_MMIO_BASE: u64 = 0x0300_0000;
87// Size of the whole MMIO region.
88const RISCV64_MMIO_SIZE: u64 = 0x10_0000;
89
90const RISCV64_FDT_MAX_SIZE: u64 = 0x1_0000;
91
92fn get_kernel_addr() -> GuestAddress {
93    GuestAddress(RISCV64_PHYS_MEM_START + RISCV64_KERNEL_OFFSET)
94}
95
96const RISCV64_IRQ_BASE: u32 = 1;
97
98#[sorted]
99#[derive(Error, Debug)]
100pub enum Error {
101    #[error("unable to clone an Event: {0}")]
102    CloneEvent(base::Error),
103    #[error("failed to clone IRQ chip: {0}")]
104    CloneIrqChip(base::Error),
105    #[error("the given kernel command line was invalid: {0}")]
106    Cmdline(kernel_cmdline::Error),
107    #[error("unable to make an Event: {0}")]
108    CreateEvent(base::Error),
109    #[error("FDT could not be created: {0}")]
110    CreateFdt(cros_fdt::Error),
111    #[error("failed to create a PCI root hub: {0}")]
112    CreatePciRoot(arch::DeviceRegistrationError),
113    #[error("failed to create platform bus: {0}")]
114    CreatePlatformBus(arch::DeviceRegistrationError),
115    #[error("unable to create serial devices: {0}")]
116    CreateSerialDevices(arch::DeviceRegistrationError),
117    #[error("failed to create socket: {0}")]
118    CreateSocket(io::Error),
119    #[error("failed to create VCPU: {0}")]
120    CreateVcpu(base::Error),
121    #[error("vm created wrong kind of vcpu")]
122    DowncastVcpu,
123    #[error("failed to finalize devices: {0}")]
124    FinalizeDevices(base::Error),
125    #[error("failed to finalize IRQ chip: {0}")]
126    FinalizeIrqChip(base::Error),
127    #[error("failed to get serial cmdline: {0}")]
128    GetSerialCmdline(GetSerialCmdlineError),
129    #[error("Failed to get the timer base frequency: {0}")]
130    GetTimebase(base::Error),
131    #[error("Image type not supported on riscv")]
132    ImageTypeUnsupported,
133    #[error("initrd could not be loaded: {0}")]
134    InitrdLoadFailure(arch::LoadImageError),
135    #[error("kernel could not be loaded: {0}")]
136    KernelLoadFailure(arch::LoadImageError),
137    #[error("PCI mem region not configurable on riscv (yet)")]
138    PciMemNotConfigurable,
139    #[error("protected vms not supported on riscv (yet)")]
140    ProtectedVmUnsupported,
141    #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
142    RamoopsAddress(u64, u64),
143    #[error("failed to register irq fd: {0}")]
144    RegisterIrqfd(base::Error),
145    #[error("error registering PCI bus: {0}")]
146    RegisterPci(BusError),
147    #[error("error registering virtual socket device: {0}")]
148    RegisterVsock(arch::DeviceRegistrationError),
149    #[error("failed to set device attr: {0}")]
150    SetDeviceAttr(base::Error),
151    #[error("failed to set register: {0}")]
152    SetReg(base::Error),
153    #[error("Timebase frequency too large")]
154    TimebaseTooLarge,
155    #[error("this function isn't supported")]
156    Unsupported,
157    #[error("failed to initialize VCPU: {0}")]
158    VcpuInit(base::Error),
159}
160
161pub type Result<T> = std::result::Result<T, Error>;
162
163pub struct ArchMemoryLayout {}
164
165pub struct Riscv64;
166
167impl arch::LinuxArch for Riscv64 {
168    type Error = Error;
169    type ArchMemoryLayout = ArchMemoryLayout;
170
171    fn arch_memory_layout(
172        components: &VmComponents,
173    ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
174        if components.pci_config.mem.is_some() {
175            return Err(Error::PciMemNotConfigurable);
176        }
177        Ok(ArchMemoryLayout {})
178    }
179
180    /// Returns a Vec of the valid memory addresses.
181    /// These should be used to configure the GuestMemory structure for the platfrom.
182    fn guest_memory_layout(
183        components: &VmComponents,
184        _arch_memory_layout: &Self::ArchMemoryLayout,
185        _hypervisor: &impl Hypervisor,
186    ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
187        Ok(vec![(
188            GuestAddress(RISCV64_PHYS_MEM_START),
189            components.memory_size,
190            Default::default(),
191        )])
192    }
193
194    fn get_system_allocator_config(
195        vm: &dyn Vm,
196        _arch_memory_layout: &Self::ArchMemoryLayout,
197    ) -> SystemAllocatorConfig {
198        let (high_mmio_base, high_mmio_size) =
199            get_high_mmio_base_size(vm.get_memory().memory_size(), vm.get_guest_phys_addr_bits());
200        SystemAllocatorConfig {
201            io: None,
202            low_mmio: AddressRange::from_start_and_size(RISCV64_MMIO_BASE, RISCV64_MMIO_SIZE)
203                .expect("invalid mmio region"),
204            high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
205                .expect("invalid high mmio region"),
206            platform_mmio: None,
207            first_irq: RISCV64_IRQ_BASE,
208        }
209    }
210
211    fn build_vm(
212        mut components: VmComponents,
213        _arch_memory_layout: &Self::ArchMemoryLayout,
214        _vm_evt_wrtube: &SendTube,
215        system_allocator: &mut SystemAllocator,
216        serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
217        serial_jail: Option<Minijail>,
218        (_bat_type, _bat_jail): (Option<BatteryType>, Option<Minijail>),
219        vm: Arc<dyn VmRiscv64>,
220        ramoops_region: Option<arch::pstore::RamoopsRegion>,
221        devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
222        irq_chip: Arc<dyn IrqChipRiscv64>,
223        vcpu_ids: &mut Vec<usize>,
224        _dump_device_tree_blob: Option<PathBuf>,
225        _debugcon_jail: Option<Minijail>,
226        #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
227        _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
228        device_tree_overlays: Vec<DtbOverlay>,
229        fdt_position: Option<FdtPosition>,
230        _no_pmu: bool,
231    ) -> std::result::Result<RunnableLinuxVm, Self::Error> {
232        if components.hv_cfg.protection_type == ProtectionType::Protected {
233            return Err(Error::ProtectedVmUnsupported);
234        }
235
236        let mem = vm.get_memory().clone();
237
238        let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
239
240        // Riscv doesn't really use the io bus like x86, so just create an empty bus.
241        let io_bus = Arc::new(Bus::new(BusType::Io));
242        let hypercall_bus = Arc::new(Bus::new(BusType::Hypercall));
243
244        let com_evt_1_3 = Event::new().map_err(Error::CreateEvent)?;
245        let com_evt_2_4 = Event::new().map_err(Error::CreateEvent)?;
246        let serial_devices = arch::add_serial_devices(
247            components.hv_cfg.protection_type,
248            &mmio_bus,
249            // TODO: the IRQ numbers are bogus since the events aren't actually wired up
250            (0, &com_evt_1_3),
251            (0, &com_evt_2_4),
252            serial_parameters,
253            serial_jail,
254            #[cfg(feature = "swap")]
255            swap_controller,
256        )
257        .map_err(Error::CreateSerialDevices)?;
258
259        let (pci_devices, others): (Vec<_>, Vec<_>) = devices
260            .into_iter()
261            .partition(|(dev, _)| dev.as_pci_device().is_some());
262        let pci_devices = pci_devices
263            .into_iter()
264            .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
265            .collect();
266        let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
267            arch::generate_pci_root(
268                pci_devices,
269                &*irq_chip,
270                Arc::clone(&mmio_bus),
271                GuestAddress(RISCV64_PCI_CFG_BASE),
272                8,
273                Arc::clone(&io_bus),
274                system_allocator,
275                &*vm,
276                devices::IMSIC_MAX_INT_IDS as usize,
277                None,
278                #[cfg(feature = "swap")]
279                swap_controller,
280            )
281            .map_err(Error::CreatePciRoot)?;
282
283        let pci_root = Arc::new(Mutex::new(pci));
284        let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
285        let (platform_devices, _others): (Vec<_>, Vec<_>) = others
286            .into_iter()
287            .partition(|(dev, _)| dev.as_platform_device().is_some());
288
289        let platform_devices = platform_devices
290            .into_iter()
291            .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
292            .collect();
293        let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
294            arch::sys::linux::generate_platform_bus(
295                platform_devices,
296                &*irq_chip,
297                &mmio_bus,
298                system_allocator,
299                &*vm,
300                #[cfg(feature = "swap")]
301                swap_controller,
302                &mut None,
303                components.hv_cfg.protection_type,
304            )
305            .map_err(Error::CreatePlatformBus)?;
306        pid_debug_label_map.append(&mut platform_pid_debug_label_map);
307
308        let mut cmdline = get_base_linux_cmdline();
309
310        if let Some(ramoops_region) = ramoops_region {
311            arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
312                .map_err(Error::Cmdline)?;
313        }
314
315        mmio_bus
316            .insert(pci_bus, RISCV64_PCI_CFG_BASE, RISCV64_PCI_CFG_SIZE)
317            .map_err(Error::RegisterPci)?;
318
319        get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
320            .map_err(Error::GetSerialCmdline)?;
321        for param in components.extra_kernel_params {
322            cmdline.insert_str(&param).map_err(Error::Cmdline)?;
323        }
324
325        // Event used by PMDevice to notify crosvm that guest OS is trying to suspend.
326        let (suspend_tube_send, suspend_tube_recv) = Tube::directional_pair().unwrap();
327
328        // separate out image loading from other setup to get a specific error for
329        // image loading
330        let initrd: Option<(GuestAddress, u32)>;
331        let kernel_initrd_end = match components.vm_image {
332            VmImage::Bios(ref _bios) => {
333                return Err(Error::ImageTypeUnsupported);
334            }
335            VmImage::Kernel(ref mut kernel_image) => {
336                let kernel_size = arch::load_image(&mem, kernel_image, get_kernel_addr(), u64::MAX)
337                    .map_err(Error::KernelLoadFailure)?;
338                let kernel_end = get_kernel_addr().offset() + kernel_size as u64;
339                initrd = match components.initrd_image {
340                    Some(initrd_file) => {
341                        let mut initrd_file = initrd_file;
342                        let initrd_addr =
343                            (kernel_end + (RISCV64_INITRD_ALIGN - 1)) & !(RISCV64_INITRD_ALIGN - 1);
344                        let initrd_max_size =
345                            components.memory_size - (initrd_addr - RISCV64_PHYS_MEM_START);
346                        let initrd_addr = GuestAddress(initrd_addr);
347                        let initrd_size =
348                            arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
349                                .map_err(Error::InitrdLoadFailure)?;
350                        Some((initrd_addr, initrd_size))
351                    }
352                    None => None,
353                };
354                if let Some((initrd_addr, initrd_size)) = initrd {
355                    initrd_addr.offset() + initrd_size as u64 - RISCV64_PHYS_MEM_START
356                } else {
357                    kernel_end - RISCV64_PHYS_MEM_START
358                }
359            }
360        };
361
362        // Creates vcpus early as the irqchip needs them created to attach interrupts.
363        let vcpu_count = components.vcpu_properties.len();
364        let mut vcpus = Vec::with_capacity(vcpu_count);
365        for vcpu_id in 0..vcpu_count {
366            let vcpu = vm.create_vcpu(vcpu_id).map_err(Error::CreateVcpu)?;
367            vcpus.push(vcpu);
368            vcpu_ids.push(vcpu_id);
369        }
370
371        irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
372
373        irq_chip
374            .clone()
375            .finalize_devices(system_allocator, &io_bus, &mmio_bus)
376            .map_err(Error::FinalizeDevices)?;
377        let (aia_num_ids, aia_num_sources) = irq_chip.get_num_ids_sources();
378
379        let pci_cfg = fdt::PciConfigRegion {
380            base: RISCV64_PCI_CFG_BASE,
381            size: RISCV64_PCI_CFG_SIZE,
382        };
383
384        let pci_ranges: Vec<fdt::PciRange> = system_allocator
385            .mmio_pools()
386            .iter()
387            .map(|range| fdt::PciRange {
388                space: fdt::PciAddressSpace::Memory64,
389                bus_address: range.start,
390                cpu_physical_address: range.start,
391                size: range.len().unwrap(),
392                prefetchable: false,
393            })
394            .collect();
395
396        assert!(
397            matches!(fdt_position, None | Some(FdtPosition::AfterPayload)),
398            "fdt_position={fdt_position:?} not supported"
399        );
400        let fdt_offset = (kernel_initrd_end + (RISCV64_FDT_ALIGN - 1)) & !(RISCV64_FDT_ALIGN - 1);
401
402        let timebase_freq: u32 = vcpus[0]
403            .get_one_reg(VcpuRegister::Timer(TimerRegister::TimebaseFrequency))
404            .map_err(Error::GetTimebase)?
405            .try_into()
406            .map_err(|_| Error::TimebaseTooLarge)?;
407
408        fdt::create_fdt(
409            RISCV64_FDT_MAX_SIZE as usize,
410            &mem,
411            pci_irqs,
412            pci_cfg,
413            &pci_ranges,
414            dev_resources,
415            components.vcpu_properties.len() as u32,
416            fdt_offset,
417            aia_num_ids,
418            aia_num_sources,
419            cmdline
420                .as_str_with_max_len(RISCV64_CMDLINE_MAX_SIZE - 1)
421                .map_err(Error::Cmdline)?,
422            initrd,
423            timebase_freq,
424            device_tree_overlays,
425        )
426        .map_err(Error::CreateFdt)?;
427
428        let vcpu_init = vec![
429            VcpuInitRiscv64::new(GuestAddress(fdt_offset + RISCV64_PHYS_MEM_START));
430            vcpu_count
431        ];
432
433        Ok(RunnableLinuxVm {
434            vm,
435            vcpu_count: components.vcpu_properties.len(),
436            vcpus: Some(vcpus),
437            vcpu_init,
438            vcpu_affinity: components.vcpu_affinity,
439            no_smt: false,
440            irq_chip,
441            hypercall_bus,
442            io_bus,
443            mmio_bus,
444            pid_debug_label_map,
445            resume_notify_devices: Vec::new(),
446            root_config: pci_root,
447            platform_devices,
448            hotplug_bus: BTreeMap::new(),
449            rt_cpus: components.rt_cpus,
450            delay_rt: components.delay_rt,
451            suspend_tube: (Arc::new(Mutex::new(suspend_tube_send)), suspend_tube_recv),
452            bat_control: None,
453            pm: None,
454            devices_thread: None,
455            vm_request_tubes: Vec::new(),
456        })
457    }
458
459    fn configure_vcpu(
460        _vm: &dyn Vm,
461        _hypervisor: &dyn Hypervisor,
462        _irq_chip: &dyn IrqChipRiscv64,
463        vcpu: &dyn VcpuRiscv64,
464        _vcpu_init: VcpuInitRiscv64,
465        vcpu_id: usize,
466        _num_vcpus: usize,
467        cpu_config: Option<CpuConfigRiscv64>,
468    ) -> std::result::Result<(), Self::Error> {
469        vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::Pc), get_kernel_addr().0)
470            .map_err(Self::Error::SetReg)?;
471        vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::A0), vcpu_id as u64)
472            .map_err(Self::Error::SetReg)?;
473        vcpu.set_one_reg(
474            VcpuRegister::Core(CoreRegister::A1),
475            cpu_config.unwrap().fdt_address.0,
476        )
477        .map_err(Self::Error::SetReg)?;
478
479        Ok(())
480    }
481
482    fn register_pci_device(
483        _linux: &mut RunnableLinuxVm,
484        _device: Box<dyn PciDevice>,
485        _minijail: Option<Minijail>,
486        _resources: &mut SystemAllocator,
487        _tube: &mpsc::Sender<PciRootCommand>,
488        #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
489    ) -> std::result::Result<PciAddress, Self::Error> {
490        // hotplug function isn't verified on Riscv64, so set it unsupported here.
491        Err(Error::Unsupported)
492    }
493
494    fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>> {
495        Ok(BTreeMap::new())
496    }
497
498    fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>> {
499        Ok(BTreeMap::new())
500    }
501
502    fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>> {
503        Ok(BTreeMap::new())
504    }
505
506    fn get_host_cpu_clusters() -> Result<Vec<CpuSet>> {
507        Ok(Vec::new())
508    }
509}
510
511#[cfg(feature = "gdb")]
512impl arch::GdbOps for Riscv64 {
513    type Error = Error;
514
515    fn read_memory(
516        _vcpu: &dyn VcpuRiscv64,
517        _guest_mem: &GuestMemory,
518        _vaddr: GuestAddress,
519        _len: usize,
520    ) -> Result<Vec<u8>> {
521        unimplemented!();
522    }
523
524    fn write_memory(
525        _vcpu: &dyn VcpuRiscv64,
526        _guest_mem: &GuestMemory,
527        _vaddr: GuestAddress,
528        _buf: &[u8],
529    ) -> Result<()> {
530        unimplemented!();
531    }
532
533    fn read_registers(_vcpu: &dyn VcpuRiscv64) -> Result<<GdbArch as Arch>::Registers> {
534        unimplemented!();
535    }
536
537    fn write_registers(
538        _vcpu: &dyn VcpuRiscv64,
539        _regs: &<GdbArch as Arch>::Registers,
540    ) -> Result<()> {
541        unimplemented!();
542    }
543
544    fn read_register(
545        _vcpu: &dyn VcpuRiscv64,
546        _reg_id: <GdbArch as Arch>::RegId,
547    ) -> Result<Vec<u8>> {
548        unimplemented!();
549    }
550
551    fn write_register(
552        _vcpu: &dyn VcpuRiscv64,
553        _reg_id: <GdbArch as Arch>::RegId,
554        _data: &[u8],
555    ) -> Result<()> {
556        unimplemented!();
557    }
558
559    fn enable_singlestep(_vcpu: &dyn VcpuRiscv64) -> Result<()> {
560        unimplemented!();
561    }
562
563    fn get_max_hw_breakpoints(_vcpu: &dyn VcpuRiscv64) -> Result<usize> {
564        unimplemented!();
565    }
566
567    fn set_hw_breakpoints(_vcpu: &dyn VcpuRiscv64, _breakpoints: &[GuestAddress]) -> Result<()> {
568        unimplemented!();
569    }
570}
571
572fn get_high_mmio_base_size(mem_size: u64, guest_phys_addr_bits: u8) -> (u64, u64) {
573    let guest_phys_end = 1u64 << guest_phys_addr_bits;
574    let high_mmio_base = RISCV64_PHYS_MEM_START + mem_size;
575    let size = guest_phys_end
576        .checked_sub(high_mmio_base)
577        .unwrap_or_else(|| {
578            panic!("guest_phys_end {guest_phys_end:#x} < high_mmio_base {high_mmio_base:#x}",);
579        });
580    (high_mmio_base, size)
581}
582
583fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
584    let mut cmdline = kernel_cmdline::Cmdline::new();
585    cmdline.insert_str("panic=-1").unwrap();
586    cmdline
587}