1#![cfg(target_arch = "riscv64")]
8
9use std::collections::BTreeMap;
10use std::io::{self};
11use std::path::PathBuf;
12use std::sync::mpsc;
13use std::sync::Arc;
14
15use arch::get_serial_cmdline;
16use arch::CpuSet;
17use arch::DtbOverlay;
18use arch::FdtPosition;
19use arch::GetSerialCmdlineError;
20use arch::RunnableLinuxVm;
21use arch::VmComponents;
22use arch::VmImage;
23use base::Event;
24use base::SendTube;
25use base::Tube;
26use devices::serial_device::SerialHardware;
27use devices::serial_device::SerialParameters;
28use devices::Bus;
29use devices::BusDeviceObj;
30use devices::BusError;
31use devices::BusType;
32use devices::IrqChipRiscv64;
33use devices::PciAddress;
34use devices::PciConfigMmio;
35use devices::PciDevice;
36use devices::PciRootCommand;
37#[cfg(feature = "gdb")]
38use gdbstub::arch::Arch;
39#[cfg(feature = "gdb")]
40use gdbstub_arch::riscv::Riscv64 as GdbArch;
41use hypervisor::CoreRegister;
42use hypervisor::CpuConfigRiscv64;
43use hypervisor::Hypervisor;
44use hypervisor::ProtectionType;
45use hypervisor::TimerRegister;
46use hypervisor::VcpuInitRiscv64;
47use hypervisor::VcpuRegister;
48use hypervisor::VcpuRiscv64;
49use hypervisor::Vm;
50use hypervisor::VmRiscv64;
51#[cfg(windows)]
52use jail::FakeMinijailStub as Minijail;
53#[cfg(any(target_os = "android", target_os = "linux"))]
54use minijail::Minijail;
55use remain::sorted;
56use resources::AddressRange;
57use resources::SystemAllocator;
58use resources::SystemAllocatorConfig;
59use sync::Condvar;
60use sync::Mutex;
61use thiserror::Error;
62use vm_control::BatteryType;
63use vm_memory::GuestAddress;
64#[cfg(feature = "gdb")]
65use vm_memory::GuestMemory;
66use vm_memory::MemoryRegionOptions;
67
68mod fdt;
69
70const RISCV64_KERNEL_OFFSET: u64 = 0x20_0000;
72const RISCV64_INITRD_ALIGN: u64 = 8;
73const RISCV64_FDT_ALIGN: u64 = 0x40_0000;
74
75const RISCV64_CMDLINE_MAX_SIZE: usize = 1024;
77
78const RISCV64_PHYS_MEM_START: u64 = 0x8000_0000;
80
81const RISCV64_PCI_CFG_BASE: u64 = 0x1_0000;
83const RISCV64_PCI_CFG_SIZE: u64 = 0x100_0000;
85const RISCV64_MMIO_BASE: u64 = 0x0300_0000;
87const RISCV64_MMIO_SIZE: u64 = 0x10_0000;
89
90const RISCV64_FDT_MAX_SIZE: u64 = 0x1_0000;
91
92fn get_kernel_addr() -> GuestAddress {
93 GuestAddress(RISCV64_PHYS_MEM_START + RISCV64_KERNEL_OFFSET)
94}
95
96const RISCV64_IRQ_BASE: u32 = 1;
97
98#[sorted]
99#[derive(Error, Debug)]
100pub enum Error {
101 #[error("unable to clone an Event: {0}")]
102 CloneEvent(base::Error),
103 #[error("failed to clone IRQ chip: {0}")]
104 CloneIrqChip(base::Error),
105 #[error("the given kernel command line was invalid: {0}")]
106 Cmdline(kernel_cmdline::Error),
107 #[error("unable to make an Event: {0}")]
108 CreateEvent(base::Error),
109 #[error("FDT could not be created: {0}")]
110 CreateFdt(cros_fdt::Error),
111 #[error("failed to create a PCI root hub: {0}")]
112 CreatePciRoot(arch::DeviceRegistrationError),
113 #[error("failed to create platform bus: {0}")]
114 CreatePlatformBus(arch::DeviceRegistrationError),
115 #[error("unable to create serial devices: {0}")]
116 CreateSerialDevices(arch::DeviceRegistrationError),
117 #[error("failed to create socket: {0}")]
118 CreateSocket(io::Error),
119 #[error("failed to create VCPU: {0}")]
120 CreateVcpu(base::Error),
121 #[error("vm created wrong kind of vcpu")]
122 DowncastVcpu,
123 #[error("failed to finalize devices: {0}")]
124 FinalizeDevices(base::Error),
125 #[error("failed to finalize IRQ chip: {0}")]
126 FinalizeIrqChip(base::Error),
127 #[error("failed to get serial cmdline: {0}")]
128 GetSerialCmdline(GetSerialCmdlineError),
129 #[error("Failed to get the timer base frequency: {0}")]
130 GetTimebase(base::Error),
131 #[error("Image type not supported on riscv")]
132 ImageTypeUnsupported,
133 #[error("initrd could not be loaded: {0}")]
134 InitrdLoadFailure(arch::LoadImageError),
135 #[error("kernel could not be loaded: {0}")]
136 KernelLoadFailure(arch::LoadImageError),
137 #[error("PCI mem region not configurable on riscv (yet)")]
138 PciMemNotConfigurable,
139 #[error("protected vms not supported on riscv (yet)")]
140 ProtectedVmUnsupported,
141 #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
142 RamoopsAddress(u64, u64),
143 #[error("failed to register irq fd: {0}")]
144 RegisterIrqfd(base::Error),
145 #[error("error registering PCI bus: {0}")]
146 RegisterPci(BusError),
147 #[error("error registering virtual socket device: {0}")]
148 RegisterVsock(arch::DeviceRegistrationError),
149 #[error("failed to set device attr: {0}")]
150 SetDeviceAttr(base::Error),
151 #[error("failed to set register: {0}")]
152 SetReg(base::Error),
153 #[error("Timebase frequency too large")]
154 TimebaseTooLarge,
155 #[error("this function isn't supported")]
156 Unsupported,
157 #[error("failed to initialize VCPU: {0}")]
158 VcpuInit(base::Error),
159}
160
161pub type Result<T> = std::result::Result<T, Error>;
162
163pub struct ArchMemoryLayout {}
164
165pub struct Riscv64;
166
167impl arch::LinuxArch for Riscv64 {
168 type Error = Error;
169 type ArchMemoryLayout = ArchMemoryLayout;
170
171 fn arch_memory_layout(
172 components: &VmComponents,
173 ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
174 if components.pci_config.mem.is_some() {
175 return Err(Error::PciMemNotConfigurable);
176 }
177 Ok(ArchMemoryLayout {})
178 }
179
180 fn guest_memory_layout(
183 components: &VmComponents,
184 _arch_memory_layout: &Self::ArchMemoryLayout,
185 _hypervisor: &impl Hypervisor,
186 ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
187 Ok(vec![(
188 GuestAddress(RISCV64_PHYS_MEM_START),
189 components.memory_size,
190 Default::default(),
191 )])
192 }
193
194 fn get_system_allocator_config<V: Vm>(
195 vm: &V,
196 _arch_memory_layout: &Self::ArchMemoryLayout,
197 ) -> SystemAllocatorConfig {
198 let (high_mmio_base, high_mmio_size) =
199 get_high_mmio_base_size(vm.get_memory().memory_size(), vm.get_guest_phys_addr_bits());
200 SystemAllocatorConfig {
201 io: None,
202 low_mmio: AddressRange::from_start_and_size(RISCV64_MMIO_BASE, RISCV64_MMIO_SIZE)
203 .expect("invalid mmio region"),
204 high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
205 .expect("invalid high mmio region"),
206 platform_mmio: None,
207 first_irq: RISCV64_IRQ_BASE,
208 }
209 }
210
211 fn build_vm<V, Vcpu>(
212 mut components: VmComponents,
213 _arch_memory_layout: &Self::ArchMemoryLayout,
214 _vm_evt_wrtube: &SendTube,
215 system_allocator: &mut SystemAllocator,
216 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
217 serial_jail: Option<Minijail>,
218 (_bat_type, _bat_jail): (Option<BatteryType>, Option<Minijail>),
219 mut vm: V,
220 ramoops_region: Option<arch::pstore::RamoopsRegion>,
221 devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
222 irq_chip: &mut dyn IrqChipRiscv64,
223 vcpu_ids: &mut Vec<usize>,
224 _dump_device_tree_blob: Option<PathBuf>,
225 _debugcon_jail: Option<Minijail>,
226 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
227 _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
228 device_tree_overlays: Vec<DtbOverlay>,
229 fdt_position: Option<FdtPosition>,
230 _no_pmu: bool,
231 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
232 where
233 V: VmRiscv64,
234 Vcpu: VcpuRiscv64,
235 {
236 if components.hv_cfg.protection_type == ProtectionType::Protected {
237 return Err(Error::ProtectedVmUnsupported);
238 }
239
240 let mem = vm.get_memory().clone();
241
242 let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
243
244 let io_bus = Arc::new(Bus::new(BusType::Io));
246 let hypercall_bus = Arc::new(Bus::new(BusType::Hypercall));
247
248 let com_evt_1_3 = Event::new().map_err(Error::CreateEvent)?;
249 let com_evt_2_4 = Event::new().map_err(Error::CreateEvent)?;
250 let serial_devices = arch::add_serial_devices(
251 components.hv_cfg.protection_type,
252 &mmio_bus,
253 (0, &com_evt_1_3),
255 (0, &com_evt_2_4),
256 serial_parameters,
257 serial_jail,
258 #[cfg(feature = "swap")]
259 swap_controller,
260 )
261 .map_err(Error::CreateSerialDevices)?;
262
263 let (pci_devices, others): (Vec<_>, Vec<_>) = devices
264 .into_iter()
265 .partition(|(dev, _)| dev.as_pci_device().is_some());
266 let pci_devices = pci_devices
267 .into_iter()
268 .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
269 .collect();
270 let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
271 arch::generate_pci_root(
272 pci_devices,
273 irq_chip.as_irq_chip_mut(),
274 Arc::clone(&mmio_bus),
275 GuestAddress(RISCV64_PCI_CFG_BASE),
276 8,
277 Arc::clone(&io_bus),
278 system_allocator,
279 &mut vm,
280 devices::IMSIC_MAX_INT_IDS as usize,
281 None,
282 #[cfg(feature = "swap")]
283 swap_controller,
284 )
285 .map_err(Error::CreatePciRoot)?;
286
287 let pci_root = Arc::new(Mutex::new(pci));
288 let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
289 let (platform_devices, _others): (Vec<_>, Vec<_>) = others
290 .into_iter()
291 .partition(|(dev, _)| dev.as_platform_device().is_some());
292
293 let platform_devices = platform_devices
294 .into_iter()
295 .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
296 .collect();
297 let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
298 arch::sys::linux::generate_platform_bus(
299 platform_devices,
300 irq_chip.as_irq_chip_mut(),
301 &mmio_bus,
302 system_allocator,
303 &mut vm,
304 #[cfg(feature = "swap")]
305 swap_controller,
306 &mut None,
307 components.hv_cfg.protection_type,
308 )
309 .map_err(Error::CreatePlatformBus)?;
310 pid_debug_label_map.append(&mut platform_pid_debug_label_map);
311
312 let mut cmdline = get_base_linux_cmdline();
313
314 if let Some(ramoops_region) = ramoops_region {
315 arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
316 .map_err(Error::Cmdline)?;
317 }
318
319 mmio_bus
320 .insert(pci_bus, RISCV64_PCI_CFG_BASE, RISCV64_PCI_CFG_SIZE)
321 .map_err(Error::RegisterPci)?;
322
323 get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
324 .map_err(Error::GetSerialCmdline)?;
325 for param in components.extra_kernel_params {
326 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
327 }
328
329 let (suspend_tube_send, suspend_tube_recv) = Tube::directional_pair().unwrap();
331
332 let initrd: Option<(GuestAddress, u32)>;
335 let kernel_initrd_end = match components.vm_image {
336 VmImage::Bios(ref _bios) => {
337 return Err(Error::ImageTypeUnsupported);
338 }
339 VmImage::Kernel(ref mut kernel_image) => {
340 let kernel_size = arch::load_image(&mem, kernel_image, get_kernel_addr(), u64::MAX)
341 .map_err(Error::KernelLoadFailure)?;
342 let kernel_end = get_kernel_addr().offset() + kernel_size as u64;
343 initrd = match components.initrd_image {
344 Some(initrd_file) => {
345 let mut initrd_file = initrd_file;
346 let initrd_addr =
347 (kernel_end + (RISCV64_INITRD_ALIGN - 1)) & !(RISCV64_INITRD_ALIGN - 1);
348 let initrd_max_size =
349 components.memory_size - (initrd_addr - RISCV64_PHYS_MEM_START);
350 let initrd_addr = GuestAddress(initrd_addr);
351 let initrd_size =
352 arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
353 .map_err(Error::InitrdLoadFailure)?;
354 Some((initrd_addr, initrd_size))
355 }
356 None => None,
357 };
358 if let Some((initrd_addr, initrd_size)) = initrd {
359 initrd_addr.offset() + initrd_size as u64 - RISCV64_PHYS_MEM_START
360 } else {
361 kernel_end - RISCV64_PHYS_MEM_START
362 }
363 }
364 };
365
366 let vcpu_count = components.vcpu_count;
368 let mut vcpus = Vec::with_capacity(vcpu_count);
369 for vcpu_id in 0..vcpu_count {
370 let vcpu: Vcpu = *vm
371 .create_vcpu(vcpu_id)
372 .map_err(Error::CreateVcpu)?
373 .downcast::<Vcpu>()
374 .map_err(|_| Error::DowncastVcpu)?;
375 vcpus.push(vcpu);
376 vcpu_ids.push(vcpu_id);
377 }
378
379 irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
380
381 irq_chip
382 .finalize_devices(system_allocator, &io_bus, &mmio_bus)
383 .map_err(Error::FinalizeDevices)?;
384 let (aia_num_ids, aia_num_sources) = irq_chip.get_num_ids_sources();
385
386 let pci_cfg = fdt::PciConfigRegion {
387 base: RISCV64_PCI_CFG_BASE,
388 size: RISCV64_PCI_CFG_SIZE,
389 };
390
391 let pci_ranges: Vec<fdt::PciRange> = system_allocator
392 .mmio_pools()
393 .iter()
394 .map(|range| fdt::PciRange {
395 space: fdt::PciAddressSpace::Memory64,
396 bus_address: range.start,
397 cpu_physical_address: range.start,
398 size: range.len().unwrap(),
399 prefetchable: false,
400 })
401 .collect();
402
403 assert!(
404 matches!(fdt_position, None | Some(FdtPosition::AfterPayload)),
405 "fdt_position={fdt_position:?} not supported"
406 );
407 let fdt_offset = (kernel_initrd_end + (RISCV64_FDT_ALIGN - 1)) & !(RISCV64_FDT_ALIGN - 1);
408
409 let timebase_freq: u32 = vcpus[0]
410 .get_one_reg(VcpuRegister::Timer(TimerRegister::TimebaseFrequency))
411 .map_err(Error::GetTimebase)?
412 .try_into()
413 .map_err(|_| Error::TimebaseTooLarge)?;
414
415 fdt::create_fdt(
416 RISCV64_FDT_MAX_SIZE as usize,
417 &mem,
418 pci_irqs,
419 pci_cfg,
420 &pci_ranges,
421 dev_resources,
422 components.vcpu_count as u32,
423 fdt_offset,
424 aia_num_ids,
425 aia_num_sources,
426 cmdline
427 .as_str_with_max_len(RISCV64_CMDLINE_MAX_SIZE - 1)
428 .map_err(Error::Cmdline)?,
429 initrd,
430 timebase_freq,
431 device_tree_overlays,
432 )
433 .map_err(Error::CreateFdt)?;
434
435 let vcpu_init = vec![
436 VcpuInitRiscv64::new(GuestAddress(fdt_offset + RISCV64_PHYS_MEM_START));
437 vcpu_count
438 ];
439
440 Ok(RunnableLinuxVm {
441 vm,
442 vcpu_count: components.vcpu_count,
443 vcpus: Some(vcpus),
444 vcpu_init,
445 vcpu_affinity: components.vcpu_affinity,
446 no_smt: false,
447 irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
448 hypercall_bus,
449 io_bus,
450 mmio_bus,
451 pid_debug_label_map,
452 resume_notify_devices: Vec::new(),
453 root_config: pci_root,
454 platform_devices,
455 hotplug_bus: BTreeMap::new(),
456 rt_cpus: components.rt_cpus,
457 delay_rt: components.delay_rt,
458 suspend_tube: (Arc::new(Mutex::new(suspend_tube_send)), suspend_tube_recv),
459 bat_control: None,
460 pm: None,
461 devices_thread: None,
462 vm_request_tubes: Vec::new(),
463 })
464 }
465
466 fn configure_vcpu<V: Vm>(
467 _vm: &V,
468 _hypervisor: &dyn Hypervisor,
469 _irq_chip: &mut dyn IrqChipRiscv64,
470 vcpu: &mut dyn VcpuRiscv64,
471 _vcpu_init: VcpuInitRiscv64,
472 vcpu_id: usize,
473 _num_cpus: usize,
474 cpu_config: Option<CpuConfigRiscv64>,
475 ) -> std::result::Result<(), Self::Error> {
476 vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::Pc), get_kernel_addr().0)
477 .map_err(Self::Error::SetReg)?;
478 vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::A0), vcpu_id as u64)
479 .map_err(Self::Error::SetReg)?;
480 vcpu.set_one_reg(
481 VcpuRegister::Core(CoreRegister::A1),
482 cpu_config.unwrap().fdt_address.0,
483 )
484 .map_err(Self::Error::SetReg)?;
485
486 Ok(())
487 }
488
489 fn register_pci_device<V: VmRiscv64, Vcpu: VcpuRiscv64>(
490 _linux: &mut RunnableLinuxVm<V, Vcpu>,
491 _device: Box<dyn PciDevice>,
492 _minijail: Option<Minijail>,
493 _resources: &mut SystemAllocator,
494 _tube: &mpsc::Sender<PciRootCommand>,
495 #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
496 ) -> std::result::Result<PciAddress, Self::Error> {
497 Err(Error::Unsupported)
499 }
500
501 fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>> {
502 Ok(BTreeMap::new())
503 }
504
505 fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>> {
506 Ok(BTreeMap::new())
507 }
508
509 fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>> {
510 Ok(BTreeMap::new())
511 }
512
513 fn get_host_cpu_clusters() -> Result<Vec<CpuSet>> {
514 Ok(Vec::new())
515 }
516}
517
518#[cfg(feature = "gdb")]
519impl<T: VcpuRiscv64> arch::GdbOps<T> for Riscv64 {
520 type Error = Error;
521
522 fn read_memory(
523 _vcpu: &T,
524 _guest_mem: &GuestMemory,
525 _vaddr: GuestAddress,
526 _len: usize,
527 ) -> Result<Vec<u8>> {
528 unimplemented!();
529 }
530
531 fn write_memory(
532 _vcpu: &T,
533 _guest_mem: &GuestMemory,
534 _vaddr: GuestAddress,
535 _buf: &[u8],
536 ) -> Result<()> {
537 unimplemented!();
538 }
539
540 fn read_registers(_vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
541 unimplemented!();
542 }
543
544 fn write_registers(_vcpu: &T, _regs: &<GdbArch as Arch>::Registers) -> Result<()> {
545 unimplemented!();
546 }
547
548 fn read_register(_vcpu: &T, _reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
549 unimplemented!();
550 }
551
552 fn write_register(_vcpu: &T, _reg_id: <GdbArch as Arch>::RegId, _data: &[u8]) -> Result<()> {
553 unimplemented!();
554 }
555
556 fn enable_singlestep(_vcpu: &T) -> Result<()> {
557 unimplemented!();
558 }
559
560 fn get_max_hw_breakpoints(_vcpu: &T) -> Result<usize> {
561 unimplemented!();
562 }
563
564 fn set_hw_breakpoints(_vcpu: &T, _breakpoints: &[GuestAddress]) -> Result<()> {
565 unimplemented!();
566 }
567}
568
569fn get_high_mmio_base_size(mem_size: u64, guest_phys_addr_bits: u8) -> (u64, u64) {
570 let guest_phys_end = 1u64 << guest_phys_addr_bits;
571 let high_mmio_base = RISCV64_PHYS_MEM_START + mem_size;
572 let size = guest_phys_end
573 .checked_sub(high_mmio_base)
574 .unwrap_or_else(|| {
575 panic!("guest_phys_end {guest_phys_end:#x} < high_mmio_base {high_mmio_base:#x}",);
576 });
577 (high_mmio_base, size)
578}
579
580fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
581 let mut cmdline = kernel_cmdline::Cmdline::new();
582 cmdline.insert_str("panic=-1").unwrap();
583 cmdline
584}