1#![cfg(target_arch = "riscv64")]
8
9use std::collections::BTreeMap;
10use std::io::{self};
11use std::path::PathBuf;
12use std::sync::mpsc;
13use std::sync::Arc;
14
15use arch::get_serial_cmdline;
16use arch::CpuSet;
17use arch::DtbOverlay;
18use arch::FdtPosition;
19use arch::GetSerialCmdlineError;
20use arch::RunnableLinuxVm;
21use arch::VmComponents;
22use arch::VmImage;
23use base::Event;
24use base::SendTube;
25use base::Tube;
26use devices::serial_device::SerialHardware;
27use devices::serial_device::SerialParameters;
28use devices::Bus;
29use devices::BusDeviceObj;
30use devices::BusError;
31use devices::BusType;
32use devices::IrqChipRiscv64;
33use devices::PciAddress;
34use devices::PciConfigMmio;
35use devices::PciDevice;
36use devices::PciRootCommand;
37#[cfg(feature = "gdb")]
38use gdbstub::arch::Arch;
39#[cfg(feature = "gdb")]
40use gdbstub_arch::riscv::Riscv64 as GdbArch;
41use hypervisor::CoreRegister;
42use hypervisor::CpuConfigRiscv64;
43use hypervisor::Hypervisor;
44use hypervisor::ProtectionType;
45use hypervisor::TimerRegister;
46use hypervisor::VcpuInitRiscv64;
47use hypervisor::VcpuRegister;
48use hypervisor::VcpuRiscv64;
49use hypervisor::Vm;
50use hypervisor::VmRiscv64;
51#[cfg(windows)]
52use jail::FakeMinijailStub as Minijail;
53#[cfg(any(target_os = "android", target_os = "linux"))]
54use minijail::Minijail;
55use remain::sorted;
56use resources::AddressRange;
57use resources::SystemAllocator;
58use resources::SystemAllocatorConfig;
59use sync::Condvar;
60use sync::Mutex;
61use thiserror::Error;
62use vm_control::BatteryType;
63use vm_memory::GuestAddress;
64#[cfg(feature = "gdb")]
65use vm_memory::GuestMemory;
66use vm_memory::MemoryRegionOptions;
67
68mod fdt;
69
70const RISCV64_KERNEL_OFFSET: u64 = 0x20_0000;
72const RISCV64_INITRD_ALIGN: u64 = 8;
73const RISCV64_FDT_ALIGN: u64 = 0x40_0000;
74
75const RISCV64_CMDLINE_MAX_SIZE: usize = 1024;
77
78const RISCV64_PHYS_MEM_START: u64 = 0x8000_0000;
80
81const RISCV64_PCI_CFG_BASE: u64 = 0x1_0000;
83const RISCV64_PCI_CFG_SIZE: u64 = 0x100_0000;
85const RISCV64_MMIO_BASE: u64 = 0x0300_0000;
87const RISCV64_MMIO_SIZE: u64 = 0x10_0000;
89
90const RISCV64_FDT_MAX_SIZE: u64 = 0x1_0000;
91
92fn get_kernel_addr() -> GuestAddress {
93 GuestAddress(RISCV64_PHYS_MEM_START + RISCV64_KERNEL_OFFSET)
94}
95
96const RISCV64_IRQ_BASE: u32 = 1;
97
98#[sorted]
99#[derive(Error, Debug)]
100pub enum Error {
101 #[error("unable to clone an Event: {0}")]
102 CloneEvent(base::Error),
103 #[error("failed to clone IRQ chip: {0}")]
104 CloneIrqChip(base::Error),
105 #[error("the given kernel command line was invalid: {0}")]
106 Cmdline(kernel_cmdline::Error),
107 #[error("unable to make an Event: {0}")]
108 CreateEvent(base::Error),
109 #[error("FDT could not be created: {0}")]
110 CreateFdt(cros_fdt::Error),
111 #[error("failed to create a PCI root hub: {0}")]
112 CreatePciRoot(arch::DeviceRegistrationError),
113 #[error("failed to create platform bus: {0}")]
114 CreatePlatformBus(arch::DeviceRegistrationError),
115 #[error("unable to create serial devices: {0}")]
116 CreateSerialDevices(arch::DeviceRegistrationError),
117 #[error("failed to create socket: {0}")]
118 CreateSocket(io::Error),
119 #[error("failed to create VCPU: {0}")]
120 CreateVcpu(base::Error),
121 #[error("vm created wrong kind of vcpu")]
122 DowncastVcpu,
123 #[error("failed to finalize devices: {0}")]
124 FinalizeDevices(base::Error),
125 #[error("failed to finalize IRQ chip: {0}")]
126 FinalizeIrqChip(base::Error),
127 #[error("failed to get serial cmdline: {0}")]
128 GetSerialCmdline(GetSerialCmdlineError),
129 #[error("Failed to get the timer base frequency: {0}")]
130 GetTimebase(base::Error),
131 #[error("Image type not supported on riscv")]
132 ImageTypeUnsupported,
133 #[error("initrd could not be loaded: {0}")]
134 InitrdLoadFailure(arch::LoadImageError),
135 #[error("kernel could not be loaded: {0}")]
136 KernelLoadFailure(arch::LoadImageError),
137 #[error("PCI mem region not configurable on riscv (yet)")]
138 PciMemNotConfigurable,
139 #[error("protected vms not supported on riscv (yet)")]
140 ProtectedVmUnsupported,
141 #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
142 RamoopsAddress(u64, u64),
143 #[error("failed to register irq fd: {0}")]
144 RegisterIrqfd(base::Error),
145 #[error("error registering PCI bus: {0}")]
146 RegisterPci(BusError),
147 #[error("error registering virtual socket device: {0}")]
148 RegisterVsock(arch::DeviceRegistrationError),
149 #[error("failed to set device attr: {0}")]
150 SetDeviceAttr(base::Error),
151 #[error("failed to set register: {0}")]
152 SetReg(base::Error),
153 #[error("Timebase frequency too large")]
154 TimebaseTooLarge,
155 #[error("this function isn't supported")]
156 Unsupported,
157 #[error("failed to initialize VCPU: {0}")]
158 VcpuInit(base::Error),
159}
160
161pub type Result<T> = std::result::Result<T, Error>;
162
163pub struct ArchMemoryLayout {}
164
165pub struct Riscv64;
166
167impl arch::LinuxArch for Riscv64 {
168 type Error = Error;
169 type ArchMemoryLayout = ArchMemoryLayout;
170
171 fn arch_memory_layout(
172 components: &VmComponents,
173 ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
174 if components.pci_config.mem.is_some() {
175 return Err(Error::PciMemNotConfigurable);
176 }
177 Ok(ArchMemoryLayout {})
178 }
179
180 fn guest_memory_layout(
183 components: &VmComponents,
184 _arch_memory_layout: &Self::ArchMemoryLayout,
185 _hypervisor: &impl Hypervisor,
186 ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
187 Ok(vec![(
188 GuestAddress(RISCV64_PHYS_MEM_START),
189 components.memory_size,
190 Default::default(),
191 )])
192 }
193
194 fn get_system_allocator_config(
195 vm: &dyn Vm,
196 _arch_memory_layout: &Self::ArchMemoryLayout,
197 ) -> SystemAllocatorConfig {
198 let (high_mmio_base, high_mmio_size) =
199 get_high_mmio_base_size(vm.get_memory().memory_size(), vm.get_guest_phys_addr_bits());
200 SystemAllocatorConfig {
201 io: None,
202 low_mmio: AddressRange::from_start_and_size(RISCV64_MMIO_BASE, RISCV64_MMIO_SIZE)
203 .expect("invalid mmio region"),
204 high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
205 .expect("invalid high mmio region"),
206 platform_mmio: None,
207 first_irq: RISCV64_IRQ_BASE,
208 }
209 }
210
211 fn build_vm(
212 mut components: VmComponents,
213 _arch_memory_layout: &Self::ArchMemoryLayout,
214 _vm_evt_wrtube: &SendTube,
215 system_allocator: &mut SystemAllocator,
216 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
217 serial_jail: Option<Minijail>,
218 (_bat_type, _bat_jail): (Option<BatteryType>, Option<Minijail>),
219 vm: Arc<dyn VmRiscv64>,
220 ramoops_region: Option<arch::pstore::RamoopsRegion>,
221 devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
222 irq_chip: Arc<dyn IrqChipRiscv64>,
223 vcpu_ids: &mut Vec<usize>,
224 _dump_device_tree_blob: Option<PathBuf>,
225 _debugcon_jail: Option<Minijail>,
226 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
227 _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
228 device_tree_overlays: Vec<DtbOverlay>,
229 fdt_position: Option<FdtPosition>,
230 _no_pmu: bool,
231 ) -> std::result::Result<RunnableLinuxVm, Self::Error> {
232 if components.hv_cfg.protection_type == ProtectionType::Protected {
233 return Err(Error::ProtectedVmUnsupported);
234 }
235
236 let mem = vm.get_memory().clone();
237
238 let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
239
240 let io_bus = Arc::new(Bus::new(BusType::Io));
242 let hypercall_bus = Arc::new(Bus::new(BusType::Hypercall));
243
244 let com_evt_1_3 = Event::new().map_err(Error::CreateEvent)?;
245 let com_evt_2_4 = Event::new().map_err(Error::CreateEvent)?;
246 let serial_devices = arch::add_serial_devices(
247 components.hv_cfg.protection_type,
248 &mmio_bus,
249 (0, &com_evt_1_3),
251 (0, &com_evt_2_4),
252 serial_parameters,
253 serial_jail,
254 #[cfg(feature = "swap")]
255 swap_controller,
256 )
257 .map_err(Error::CreateSerialDevices)?;
258
259 let (pci_devices, others): (Vec<_>, Vec<_>) = devices
260 .into_iter()
261 .partition(|(dev, _)| dev.as_pci_device().is_some());
262 let pci_devices = pci_devices
263 .into_iter()
264 .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
265 .collect();
266 let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
267 arch::generate_pci_root(
268 pci_devices,
269 &*irq_chip,
270 Arc::clone(&mmio_bus),
271 GuestAddress(RISCV64_PCI_CFG_BASE),
272 8,
273 Arc::clone(&io_bus),
274 system_allocator,
275 &*vm,
276 devices::IMSIC_MAX_INT_IDS as usize,
277 None,
278 #[cfg(feature = "swap")]
279 swap_controller,
280 )
281 .map_err(Error::CreatePciRoot)?;
282
283 let pci_root = Arc::new(Mutex::new(pci));
284 let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
285 let (platform_devices, _others): (Vec<_>, Vec<_>) = others
286 .into_iter()
287 .partition(|(dev, _)| dev.as_platform_device().is_some());
288
289 let platform_devices = platform_devices
290 .into_iter()
291 .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
292 .collect();
293 let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
294 arch::sys::linux::generate_platform_bus(
295 platform_devices,
296 &*irq_chip,
297 &mmio_bus,
298 system_allocator,
299 &*vm,
300 #[cfg(feature = "swap")]
301 swap_controller,
302 &mut None,
303 components.hv_cfg.protection_type,
304 )
305 .map_err(Error::CreatePlatformBus)?;
306 pid_debug_label_map.append(&mut platform_pid_debug_label_map);
307
308 let mut cmdline = get_base_linux_cmdline();
309
310 if let Some(ramoops_region) = ramoops_region {
311 arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
312 .map_err(Error::Cmdline)?;
313 }
314
315 mmio_bus
316 .insert(pci_bus, RISCV64_PCI_CFG_BASE, RISCV64_PCI_CFG_SIZE)
317 .map_err(Error::RegisterPci)?;
318
319 get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
320 .map_err(Error::GetSerialCmdline)?;
321 for param in components.extra_kernel_params {
322 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
323 }
324
325 let (suspend_tube_send, suspend_tube_recv) = Tube::directional_pair().unwrap();
327
328 let initrd: Option<(GuestAddress, u32)>;
331 let kernel_initrd_end = match components.vm_image {
332 VmImage::Bios(ref _bios) => {
333 return Err(Error::ImageTypeUnsupported);
334 }
335 VmImage::Kernel(ref mut kernel_image) => {
336 let kernel_size = arch::load_image(&mem, kernel_image, get_kernel_addr(), u64::MAX)
337 .map_err(Error::KernelLoadFailure)?;
338 let kernel_end = get_kernel_addr().offset() + kernel_size as u64;
339 initrd = match components.initrd_image {
340 Some(initrd_file) => {
341 let mut initrd_file = initrd_file;
342 let initrd_addr =
343 (kernel_end + (RISCV64_INITRD_ALIGN - 1)) & !(RISCV64_INITRD_ALIGN - 1);
344 let initrd_max_size =
345 components.memory_size - (initrd_addr - RISCV64_PHYS_MEM_START);
346 let initrd_addr = GuestAddress(initrd_addr);
347 let initrd_size =
348 arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
349 .map_err(Error::InitrdLoadFailure)?;
350 Some((initrd_addr, initrd_size))
351 }
352 None => None,
353 };
354 if let Some((initrd_addr, initrd_size)) = initrd {
355 initrd_addr.offset() + initrd_size as u64 - RISCV64_PHYS_MEM_START
356 } else {
357 kernel_end - RISCV64_PHYS_MEM_START
358 }
359 }
360 };
361
362 let vcpu_count = components.vcpu_properties.len();
364 let mut vcpus = Vec::with_capacity(vcpu_count);
365 for vcpu_id in 0..vcpu_count {
366 let vcpu = vm.create_vcpu(vcpu_id).map_err(Error::CreateVcpu)?;
367 vcpus.push(vcpu);
368 vcpu_ids.push(vcpu_id);
369 }
370
371 irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
372
373 irq_chip
374 .clone()
375 .finalize_devices(system_allocator, &io_bus, &mmio_bus)
376 .map_err(Error::FinalizeDevices)?;
377 let (aia_num_ids, aia_num_sources) = irq_chip.get_num_ids_sources();
378
379 let pci_cfg = fdt::PciConfigRegion {
380 base: RISCV64_PCI_CFG_BASE,
381 size: RISCV64_PCI_CFG_SIZE,
382 };
383
384 let pci_ranges: Vec<fdt::PciRange> = system_allocator
385 .mmio_pools()
386 .iter()
387 .map(|range| fdt::PciRange {
388 space: fdt::PciAddressSpace::Memory64,
389 bus_address: range.start,
390 cpu_physical_address: range.start,
391 size: range.len().unwrap(),
392 prefetchable: false,
393 })
394 .collect();
395
396 assert!(
397 matches!(fdt_position, None | Some(FdtPosition::AfterPayload)),
398 "fdt_position={fdt_position:?} not supported"
399 );
400 let fdt_offset = (kernel_initrd_end + (RISCV64_FDT_ALIGN - 1)) & !(RISCV64_FDT_ALIGN - 1);
401
402 let timebase_freq: u32 = vcpus[0]
403 .get_one_reg(VcpuRegister::Timer(TimerRegister::TimebaseFrequency))
404 .map_err(Error::GetTimebase)?
405 .try_into()
406 .map_err(|_| Error::TimebaseTooLarge)?;
407
408 fdt::create_fdt(
409 RISCV64_FDT_MAX_SIZE as usize,
410 &mem,
411 pci_irqs,
412 pci_cfg,
413 &pci_ranges,
414 dev_resources,
415 components.vcpu_properties.len() as u32,
416 fdt_offset,
417 aia_num_ids,
418 aia_num_sources,
419 cmdline
420 .as_str_with_max_len(RISCV64_CMDLINE_MAX_SIZE - 1)
421 .map_err(Error::Cmdline)?,
422 initrd,
423 timebase_freq,
424 device_tree_overlays,
425 )
426 .map_err(Error::CreateFdt)?;
427
428 let vcpu_init = vec![
429 VcpuInitRiscv64::new(GuestAddress(fdt_offset + RISCV64_PHYS_MEM_START));
430 vcpu_count
431 ];
432
433 Ok(RunnableLinuxVm {
434 vm,
435 vcpu_count: components.vcpu_properties.len(),
436 vcpus: Some(vcpus),
437 vcpu_init,
438 vcpu_affinity: components.vcpu_affinity,
439 no_smt: false,
440 irq_chip,
441 hypercall_bus,
442 io_bus,
443 mmio_bus,
444 pid_debug_label_map,
445 resume_notify_devices: Vec::new(),
446 root_config: pci_root,
447 platform_devices,
448 hotplug_bus: BTreeMap::new(),
449 rt_cpus: components.rt_cpus,
450 delay_rt: components.delay_rt,
451 suspend_tube: (Arc::new(Mutex::new(suspend_tube_send)), suspend_tube_recv),
452 bat_control: None,
453 pm: None,
454 devices_thread: None,
455 vm_request_tubes: Vec::new(),
456 })
457 }
458
459 fn configure_vcpu(
460 _vm: &dyn Vm,
461 _hypervisor: &dyn Hypervisor,
462 _irq_chip: &dyn IrqChipRiscv64,
463 vcpu: &dyn VcpuRiscv64,
464 _vcpu_init: VcpuInitRiscv64,
465 vcpu_id: usize,
466 _num_vcpus: usize,
467 cpu_config: Option<CpuConfigRiscv64>,
468 ) -> std::result::Result<(), Self::Error> {
469 vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::Pc), get_kernel_addr().0)
470 .map_err(Self::Error::SetReg)?;
471 vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::A0), vcpu_id as u64)
472 .map_err(Self::Error::SetReg)?;
473 vcpu.set_one_reg(
474 VcpuRegister::Core(CoreRegister::A1),
475 cpu_config.unwrap().fdt_address.0,
476 )
477 .map_err(Self::Error::SetReg)?;
478
479 Ok(())
480 }
481
482 fn register_pci_device(
483 _linux: &mut RunnableLinuxVm,
484 _device: Box<dyn PciDevice>,
485 _minijail: Option<Minijail>,
486 _resources: &mut SystemAllocator,
487 _tube: &mpsc::Sender<PciRootCommand>,
488 #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
489 ) -> std::result::Result<PciAddress, Self::Error> {
490 Err(Error::Unsupported)
492 }
493
494 fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>> {
495 Ok(BTreeMap::new())
496 }
497
498 fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>> {
499 Ok(BTreeMap::new())
500 }
501
502 fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>> {
503 Ok(BTreeMap::new())
504 }
505
506 fn get_host_cpu_clusters() -> Result<Vec<CpuSet>> {
507 Ok(Vec::new())
508 }
509}
510
511#[cfg(feature = "gdb")]
512impl arch::GdbOps for Riscv64 {
513 type Error = Error;
514
515 fn read_memory(
516 _vcpu: &dyn VcpuRiscv64,
517 _guest_mem: &GuestMemory,
518 _vaddr: GuestAddress,
519 _len: usize,
520 ) -> Result<Vec<u8>> {
521 unimplemented!();
522 }
523
524 fn write_memory(
525 _vcpu: &dyn VcpuRiscv64,
526 _guest_mem: &GuestMemory,
527 _vaddr: GuestAddress,
528 _buf: &[u8],
529 ) -> Result<()> {
530 unimplemented!();
531 }
532
533 fn read_registers(_vcpu: &dyn VcpuRiscv64) -> Result<<GdbArch as Arch>::Registers> {
534 unimplemented!();
535 }
536
537 fn write_registers(
538 _vcpu: &dyn VcpuRiscv64,
539 _regs: &<GdbArch as Arch>::Registers,
540 ) -> Result<()> {
541 unimplemented!();
542 }
543
544 fn read_register(
545 _vcpu: &dyn VcpuRiscv64,
546 _reg_id: <GdbArch as Arch>::RegId,
547 ) -> Result<Vec<u8>> {
548 unimplemented!();
549 }
550
551 fn write_register(
552 _vcpu: &dyn VcpuRiscv64,
553 _reg_id: <GdbArch as Arch>::RegId,
554 _data: &[u8],
555 ) -> Result<()> {
556 unimplemented!();
557 }
558
559 fn enable_singlestep(_vcpu: &dyn VcpuRiscv64) -> Result<()> {
560 unimplemented!();
561 }
562
563 fn get_max_hw_breakpoints(_vcpu: &dyn VcpuRiscv64) -> Result<usize> {
564 unimplemented!();
565 }
566
567 fn set_hw_breakpoints(_vcpu: &dyn VcpuRiscv64, _breakpoints: &[GuestAddress]) -> Result<()> {
568 unimplemented!();
569 }
570}
571
572fn get_high_mmio_base_size(mem_size: u64, guest_phys_addr_bits: u8) -> (u64, u64) {
573 let guest_phys_end = 1u64 << guest_phys_addr_bits;
574 let high_mmio_base = RISCV64_PHYS_MEM_START + mem_size;
575 let size = guest_phys_end
576 .checked_sub(high_mmio_base)
577 .unwrap_or_else(|| {
578 panic!("guest_phys_end {guest_phys_end:#x} < high_mmio_base {high_mmio_base:#x}",);
579 });
580 (high_mmio_base, size)
581}
582
583fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
584 let mut cmdline = kernel_cmdline::Cmdline::new();
585 cmdline.insert_str("panic=-1").unwrap();
586 cmdline
587}