use std::collections::BTreeMap;
use std::mem;
use std::result;
use base::warn;
use hypervisor::Sregs;
use hypervisor::VcpuX86_64;
use hypervisor::Vm;
use remain::sorted;
use thiserror::Error;
use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use crate::gdt;
#[sorted]
#[derive(Error, Debug)]
pub enum Error {
#[error("failed to get sregs for this cpu: {0}")]
GetSRegsIoctlFailed(base::Error),
#[error("failed to get base registers for this cpu: {0}")]
GettingRegistersIoctl(base::Error),
#[error("failed to set sregs for this cpu: {0}")]
SetSRegsIoctlFailed(base::Error),
#[error("failed to set base registers for this cpu: {0}")]
SettingRegistersIoctl(base::Error),
#[error("writing the GDT to RAM failed")]
WriteGDTFailure,
#[error("writing the IDT to RAM failed")]
WriteIDTFailure,
#[error("writing PDE to RAM failed")]
WritePDEAddress,
#[error("writing PDPTE to RAM failed")]
WritePDPTEAddress,
#[error("writing PML4 to RAM failed")]
WritePML4Address,
}
pub type Result<T> = result::Result<T, Error>;
const MTRR_MEMTYPE_UC: u8 = 0x0;
const MTRR_MEMTYPE_WB: u8 = 0x6;
const MTRR_VAR_VALID: u64 = 0x800;
const MTRR_ENABLE: u64 = 0x800;
const MTRR_PHYS_BASE_MSR: u32 = 0x200;
const MTRR_PHYS_MASK_MSR: u32 = 0x201;
const VAR_MTRR_NUM_MASK: u64 = 0xFF;
fn get_power_of_two(data: u64) -> u64 {
1 << (64 - data.leading_zeros() - 1)
}
fn get_max_len(base: u64, len: u64) -> u64 {
let mut ret = get_power_of_two(len);
while base % ret != 0 {
ret >>= 1;
}
ret
}
fn get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)> {
let mut vecs = Vec::new();
let mut remains = len;
let mut new = base;
while remains != 0 {
let max = get_max_len(new, remains);
vecs.push((new, max));
remains -= max;
new += max;
}
vecs
}
pub fn vcpu_supported_variable_mtrrs(vcpu: &dyn VcpuX86_64) -> usize {
match vcpu.get_msr(crate::msr_index::MSR_MTRRcap) {
Ok(value) => (value & VAR_MTRR_NUM_MASK) as usize,
Err(_e) => {
warn!("failed to get MSR_MTRRcap, guests with passthrough devices may be very slow");
0
}
}
}
pub fn is_mtrr_msr(id: u32) -> bool {
(id >= MTRR_PHYS_BASE_MSR && id <= MTRR_PHYS_BASE_MSR + 2 * VAR_MTRR_NUM_MASK as u32)
|| id == crate::msr_index::MSR_MTRRdefType
}
pub fn count_variable_mtrrs(msrs: &BTreeMap<u32, u64>) -> usize {
msrs.keys().filter(|&msr| is_mtrr_msr(*msr)).count() / 2
}
pub fn set_mtrr_msrs(msrs: &mut BTreeMap<u32, u64>, vm: &dyn Vm, pci_start: u64) {
let pci_len = (1 << 32) - pci_start;
let vecs = get_mtrr_pairs(pci_start, pci_len);
let phys_mask: u64 = (1 << vm.get_guest_phys_addr_bits()) - 1;
for (idx, (base, len)) in vecs.iter().enumerate() {
let reg_idx = idx as u32 * 2;
msrs.insert(MTRR_PHYS_BASE_MSR + reg_idx, base | MTRR_MEMTYPE_UC as u64);
let mask: u64 = len.wrapping_neg() & phys_mask | MTRR_VAR_VALID;
msrs.insert(MTRR_PHYS_MASK_MSR + reg_idx, mask);
}
msrs.insert(
crate::msr_index::MSR_MTRRdefType,
MTRR_ENABLE | MTRR_MEMTYPE_WB as u64,
);
}
pub fn set_default_msrs(msrs: &mut BTreeMap<u32, u64>) {
msrs.insert(crate::msr_index::MSR_IA32_TSC, 0x0);
msrs.insert(
crate::msr_index::MSR_IA32_MISC_ENABLE,
crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
);
}
pub fn set_long_mode_msrs(msrs: &mut BTreeMap<u32, u64>) {
msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_CS, 0x0);
msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_ESP, 0x0);
msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_EIP, 0x0);
msrs.insert(crate::msr_index::MSR_STAR, 0x0);
msrs.insert(crate::msr_index::MSR_CSTAR, 0x0);
msrs.insert(crate::msr_index::MSR_KERNEL_GS_BASE, 0x0);
msrs.insert(crate::msr_index::MSR_SYSCALL_MASK, 0x0);
msrs.insert(crate::msr_index::MSR_LSTAR, 0x0);
msrs.insert(crate::msr_index::MSR_IA32_TSC, 0x0);
msrs.insert(
crate::msr_index::MSR_IA32_MISC_ENABLE,
crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
);
}
const X86_CR0_PE: u64 = 0x1;
const X86_CR0_PG: u64 = 0x80000000;
const X86_CR4_PAE: u64 = 0x20;
const EFER_LME: u64 = 0x100;
const EFER_LMA: u64 = 0x400;
const BOOT_GDT_OFFSET: u64 = 0x1500;
const BOOT_IDT_OFFSET: u64 = 0x1528;
fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
for (index, entry) in table.iter().enumerate() {
let addr = boot_gdt_addr
.checked_add((index * mem::size_of::<u64>()) as u64)
.ok_or(Error::WriteGDTFailure)?;
if !guest_mem.is_valid_range(addr, mem::size_of::<u64>() as u64) {
return Err(Error::WriteGDTFailure);
}
guest_mem
.write_obj_at_addr(*entry, addr)
.map_err(|_| Error::WriteGDTFailure)?;
}
Ok(())
}
fn write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()> {
let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
guest_mem
.write_obj_at_addr(val, boot_idt_addr)
.map_err(|_| Error::WriteIDTFailure)
}
pub fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
let gdt_table: [u64; 6] = [
gdt::gdt_entry(0, 0, 0), gdt::gdt_entry(0, 0, 0), gdt::gdt_entry(0xa09b, 0, 0xfffff), gdt::gdt_entry(0xc093, 0, 0xfffff), gdt::gdt_entry(0x808b, 0, 0xfffff), 0, ];
let code_seg = gdt::segment_from_gdt(gdt_table[2], 2);
let data_seg = gdt::segment_from_gdt(gdt_table[3], 3);
let tss_seg = gdt::segment_from_gdt(gdt_table[4], 4);
write_gdt_table(&gdt_table[..], mem)?;
sregs.gdt.base = BOOT_GDT_OFFSET;
sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
write_idt_value(0, mem)?;
sregs.idt.base = BOOT_IDT_OFFSET;
sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
sregs.cs = code_seg;
sregs.ds = data_seg;
sregs.es = data_seg;
sregs.fs = data_seg;
sregs.gs = data_seg;
sregs.ss = data_seg;
sregs.tr = tss_seg;
sregs.cr0 |= X86_CR0_PE;
sregs.efer |= EFER_LME;
Ok(())
}
pub fn configure_segments_and_sregs_flat32(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
let gdt_table: [u64; 5] = [
gdt::gdt_entry(0, 0, 0), gdt::gdt_entry(0, 0, 0), gdt::gdt_entry(0xc09b, 0, 0xfffff), gdt::gdt_entry(0xc093, 0, 0xfffff), gdt::gdt_entry(0x808b, 0, 0xfffff), ];
let code_seg = gdt::segment_from_gdt(gdt_table[2], 2);
let data_seg = gdt::segment_from_gdt(gdt_table[3], 3);
let tss_seg = gdt::segment_from_gdt(gdt_table[4], 4);
write_gdt_table(&gdt_table[..], mem)?;
sregs.gdt.base = BOOT_GDT_OFFSET;
sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
write_idt_value(0, mem)?;
sregs.idt.base = BOOT_IDT_OFFSET;
sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
sregs.cs = code_seg;
sregs.ds = data_seg;
sregs.es = data_seg;
sregs.fs = data_seg;
sregs.gs = data_seg;
sregs.ss = data_seg;
sregs.tr = tss_seg;
sregs.cr0 |= X86_CR0_PE;
sregs.cr0 &= !X86_CR0_PG;
Ok(())
}
pub fn setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
let boot_pml4_addr = GuestAddress(0x9000);
let boot_pdpte_addr = GuestAddress(0xa000);
let boot_pde_addr = GuestAddress(0xb000);
const PDE_FLAGS_TABLE_REFERENCE: u64 = 0x03; const PDE_FLAGS_PAGE_MAPPING: u64 = 0x83; mem.write_obj_at_addr(
boot_pdpte_addr.offset() | PDE_FLAGS_TABLE_REFERENCE,
boot_pml4_addr,
)
.map_err(|_| Error::WritePML4Address)?;
for i in 0..4 {
let pde_addr = boot_pde_addr.unchecked_add(i * 0x1000);
mem.write_obj_at_addr(
pde_addr.offset() | PDE_FLAGS_TABLE_REFERENCE,
boot_pdpte_addr.unchecked_add(i * 8),
)
.map_err(|_| Error::WritePDPTEAddress)?;
for j in 0..512 {
mem.write_obj_at_addr(
(i << 30) | (j << 21) | PDE_FLAGS_PAGE_MAPPING,
pde_addr.unchecked_add(j * 8),
)
.map_err(|_| Error::WritePDEAddress)?;
}
}
sregs.cr3 = boot_pml4_addr.offset();
sregs.cr4 |= X86_CR4_PAE;
sregs.cr0 |= X86_CR0_PG;
sregs.efer |= EFER_LMA; Ok(())
}
#[cfg(test)]
mod tests {
use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use super::*;
fn create_guest_mem() -> GuestMemory {
GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap()
}
fn read_u64(gm: &GuestMemory, offset: u64) -> u64 {
let read_addr = GuestAddress(offset);
gm.read_obj_from_addr(read_addr).unwrap()
}
#[test]
fn segments_and_sregs() {
let mut sregs = Default::default();
let gm = create_guest_mem();
configure_segments_and_sregs(&gm, &mut sregs).unwrap();
assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
assert_eq!(0xaf9b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x10));
assert_eq!(0xcf93000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x18));
assert_eq!(0x8f8b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x20));
assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
assert_eq!(0, sregs.cs.base);
assert_eq!(0xffffffff, sregs.ds.limit_bytes);
assert_eq!(0x10, sregs.cs.selector);
assert_eq!(0x18, sregs.ds.selector);
assert_eq!(0x18, sregs.es.selector);
assert_eq!(0x18, sregs.ss.selector);
assert_eq!(1, sregs.fs.present);
assert_eq!(1, sregs.gs.g);
assert_eq!(0, sregs.ss.avl);
assert_eq!(0, sregs.tr.base);
assert_eq!(0xffffffff, sregs.tr.limit_bytes);
assert_eq!(0, sregs.tr.avl);
assert_eq!(X86_CR0_PE, sregs.cr0 & X86_CR0_PE);
assert_eq!(EFER_LME, sregs.efer);
}
#[test]
fn page_tables() {
let mut sregs = Default::default();
let gm = create_guest_mem();
setup_page_tables(&gm, &mut sregs).unwrap();
assert_eq!(0xa003, read_u64(&gm, 0x9000));
assert_eq!(0xb003, read_u64(&gm, 0xa000));
for i in 0..512 {
assert_eq!((i << 21) + 0x83u64, read_u64(&gm, 0xb000 + i * 8));
}
assert_eq!(0x9000, sregs.cr3);
assert_eq!(X86_CR4_PAE, sregs.cr4);
assert_eq!(X86_CR0_PG, sregs.cr0 & X86_CR0_PG);
}
}