x86_64/
regs.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::collections::BTreeMap;
6use std::mem;
7use std::result;
8
9use base::warn;
10use hypervisor::Sregs;
11use hypervisor::VcpuX86_64;
12use hypervisor::Vm;
13use remain::sorted;
14use thiserror::Error;
15use vm_memory::GuestAddress;
16use vm_memory::GuestMemory;
17
18use crate::gdt;
19
20#[sorted]
21#[derive(Error, Debug)]
22pub enum Error {
23    /// Failed to get sregs for this cpu.
24    #[error("failed to get sregs for this cpu: {0}")]
25    GetSRegsIoctlFailed(base::Error),
26    /// Failed to get base registers for this cpu.
27    #[error("failed to get base registers for this cpu: {0}")]
28    GettingRegistersIoctl(base::Error),
29    /// Failed to set sregs for this cpu.
30    #[error("failed to set sregs for this cpu: {0}")]
31    SetSRegsIoctlFailed(base::Error),
32    /// Failed to set base registers for this cpu.
33    #[error("failed to set base registers for this cpu: {0}")]
34    SettingRegistersIoctl(base::Error),
35    /// Writing the GDT to RAM failed.
36    #[error("writing the GDT to RAM failed")]
37    WriteGDTFailure,
38    /// Writing the IDT to RAM failed.
39    #[error("writing the IDT to RAM failed")]
40    WriteIDTFailure,
41    /// Writing PDE to RAM failed.
42    #[error("writing PDE to RAM failed")]
43    WritePDEAddress,
44    /// Writing PDPTE to RAM failed.
45    #[error("writing PDPTE to RAM failed")]
46    WritePDPTEAddress,
47    /// Writing PML4 to RAM failed.
48    #[error("writing PML4 to RAM failed")]
49    WritePML4Address,
50}
51
52pub type Result<T> = result::Result<T, Error>;
53
54const MTRR_MEMTYPE_UC: u8 = 0x0;
55const MTRR_MEMTYPE_WB: u8 = 0x6;
56const MTRR_VAR_VALID: u64 = 0x800;
57const MTRR_ENABLE: u64 = 0x800;
58const MTRR_PHYS_BASE_MSR: u32 = 0x200;
59const MTRR_PHYS_MASK_MSR: u32 = 0x201;
60const VAR_MTRR_NUM_MASK: u64 = 0xFF;
61
62// Returns the value of the highest bit in a 64-bit value. Equivalent to
63// 1 << HighBitSet(x)
64fn get_power_of_two(data: u64) -> u64 {
65    1 << (64 - data.leading_zeros() - 1)
66}
67
68// Returns the max length which suitable for mtrr setting based on the
69// specified (base, len)
70fn get_max_len(base: u64, len: u64) -> u64 {
71    let mut ret = get_power_of_two(len);
72
73    while base % ret != 0 {
74        ret >>= 1;
75    }
76
77    ret
78}
79
80// For the specified (Base, Len), returns (base, len) pair which could be
81// set into mtrr register. mtrr requires: the base-address alignment value can't be
82// less than its length
83fn get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)> {
84    let mut vecs = Vec::new();
85
86    let mut remains = len;
87    let mut new = base;
88    while remains != 0 {
89        let max = get_max_len(new, remains);
90        vecs.push((new, max));
91        remains -= max;
92        new += max;
93    }
94
95    vecs
96}
97
98/// Returns the number of variable MTRR entries supported by `vcpu`.
99pub fn vcpu_supported_variable_mtrrs(vcpu: &dyn VcpuX86_64) -> usize {
100    // Get VAR MTRR num from MSR_MTRRcap
101    match vcpu.get_msr(crate::msr_index::MSR_MTRRcap) {
102        Ok(value) => (value & VAR_MTRR_NUM_MASK) as usize,
103        Err(_e) => {
104            warn!("failed to get MSR_MTRRcap, guests with passthrough devices may be very slow");
105            0
106        }
107    }
108}
109
110/// Returns `true` if the given MSR `id` is a MTRR entry.
111pub fn is_mtrr_msr(id: u32) -> bool {
112    // Variable MTRR MSRs are pairs starting at 0x200 (MTRR_PHYS_BASE_MSR) / 0x201
113    // (MTRR_PHYS_MASK_MSR) and extending up to 0xFF pairs at most.
114    (id >= MTRR_PHYS_BASE_MSR && id <= MTRR_PHYS_BASE_MSR + 2 * VAR_MTRR_NUM_MASK as u32)
115        || id == crate::msr_index::MSR_MTRRdefType
116}
117
118/// Returns the count of variable MTRR entries specified by the list of `msrs`.
119pub fn count_variable_mtrrs(msrs: &BTreeMap<u32, u64>) -> usize {
120    // Each variable MTRR takes up two MSRs (base + mask), so divide by 2. This will also count the
121    // MTRRdefType entry, but that is only one extra and the division truncates, so it won't affect
122    // the final count.
123    msrs.keys().filter(|&msr| is_mtrr_msr(*msr)).count() / 2
124}
125
126/// Returns a set of MSRs containing the MTRR configuration.
127pub fn set_mtrr_msrs(msrs: &mut BTreeMap<u32, u64>, vm: &dyn Vm, pci_start: u64) {
128    // Set pci_start .. 4G as UC
129    // all others are set to default WB
130    let pci_len = (1 << 32) - pci_start;
131    let vecs = get_mtrr_pairs(pci_start, pci_len);
132
133    let phys_mask: u64 = (1 << vm.get_guest_phys_addr_bits()) - 1;
134    for (idx, (base, len)) in vecs.iter().enumerate() {
135        let reg_idx = idx as u32 * 2;
136        msrs.insert(MTRR_PHYS_BASE_MSR + reg_idx, base | MTRR_MEMTYPE_UC as u64);
137        let mask: u64 = len.wrapping_neg() & phys_mask | MTRR_VAR_VALID;
138        msrs.insert(MTRR_PHYS_MASK_MSR + reg_idx, mask);
139    }
140    // Disable fixed MTRRs and enable variable MTRRs, set default type as WB
141    msrs.insert(
142        crate::msr_index::MSR_MTRRdefType,
143        MTRR_ENABLE | MTRR_MEMTYPE_WB as u64,
144    );
145}
146
147/// Returns the default value of MSRs at reset.
148///
149/// Currently only sets IA32_TSC to 0.
150pub fn set_default_msrs(msrs: &mut BTreeMap<u32, u64>) {
151    msrs.insert(crate::msr_index::MSR_IA32_TSC, 0x0);
152    msrs.insert(
153        crate::msr_index::MSR_IA32_MISC_ENABLE,
154        crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
155    );
156}
157
158/// Configure Model specific registers for long (64-bit) mode.
159pub fn set_long_mode_msrs(msrs: &mut BTreeMap<u32, u64>) {
160    msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_CS, 0x0);
161    msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_ESP, 0x0);
162    msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_EIP, 0x0);
163
164    // x86_64 specific msrs, we only run on x86_64 not x86
165    msrs.insert(crate::msr_index::MSR_STAR, 0x0);
166    msrs.insert(crate::msr_index::MSR_CSTAR, 0x0);
167    msrs.insert(crate::msr_index::MSR_KERNEL_GS_BASE, 0x0);
168    msrs.insert(crate::msr_index::MSR_SYSCALL_MASK, 0x0);
169    msrs.insert(crate::msr_index::MSR_LSTAR, 0x0);
170    // end of x86_64 specific code
171
172    msrs.insert(crate::msr_index::MSR_IA32_TSC, 0x0);
173    msrs.insert(
174        crate::msr_index::MSR_IA32_MISC_ENABLE,
175        crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
176    );
177}
178
179const X86_CR0_PE: u64 = 0x1;
180const X86_CR0_PG: u64 = 0x80000000;
181const X86_CR4_PAE: u64 = 0x20;
182
183const EFER_LME: u64 = 0x100;
184const EFER_LMA: u64 = 0x400;
185
186const BOOT_GDT_OFFSET: u64 = 0x1500;
187const BOOT_IDT_OFFSET: u64 = 0x1528;
188
189fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
190    let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
191    for (index, entry) in table.iter().enumerate() {
192        let addr = boot_gdt_addr
193            .checked_add((index * mem::size_of::<u64>()) as u64)
194            .ok_or(Error::WriteGDTFailure)?;
195        if !guest_mem.is_valid_range(addr, mem::size_of::<u64>() as u64) {
196            return Err(Error::WriteGDTFailure);
197        }
198
199        guest_mem
200            .write_obj_at_addr(*entry, addr)
201            .map_err(|_| Error::WriteGDTFailure)?;
202    }
203    Ok(())
204}
205
206fn write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()> {
207    let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
208    guest_mem
209        .write_obj_at_addr(val, boot_idt_addr)
210        .map_err(|_| Error::WriteIDTFailure)
211}
212
213/// Configures the GDT, IDT, and segment registers for long mode.
214pub fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
215    // reference: https://docs.kernel.org/arch/x86/boot.html?highlight=__BOOT_CS#id1
216    let gdt_table: [u64; 6] = [
217        gdt::gdt_entry(0, 0, 0),            // NULL
218        gdt::gdt_entry(0, 0, 0),            // NULL
219        gdt::gdt_entry(0xa09b, 0, 0xfffff), // CODE
220        gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
221        gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
222        0,                                  // TSS (upper 32 bits of base)
223    ];
224
225    let code_seg = gdt::segment_from_gdt(gdt_table[2], 2);
226    let data_seg = gdt::segment_from_gdt(gdt_table[3], 3);
227    let tss_seg = gdt::segment_from_gdt(gdt_table[4], 4);
228
229    // Write segments
230    write_gdt_table(&gdt_table[..], mem)?;
231    sregs.gdt.base = BOOT_GDT_OFFSET;
232    sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
233
234    write_idt_value(0, mem)?;
235    sregs.idt.base = BOOT_IDT_OFFSET;
236    sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
237
238    sregs.cs = code_seg;
239    sregs.ds = data_seg;
240    sregs.es = data_seg;
241    sregs.fs = data_seg;
242    sregs.gs = data_seg;
243    sregs.ss = data_seg;
244    sregs.tr = tss_seg;
245
246    /* 64-bit protected mode */
247    sregs.cr0 |= X86_CR0_PE;
248    sregs.efer |= EFER_LME;
249
250    Ok(())
251}
252
253/// Configures the GDT, IDT, and segment registers for 32-bit protected mode with paging disabled.
254pub fn configure_segments_and_sregs_flat32(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
255    // reference: https://docs.kernel.org/arch/x86/boot.html?highlight=__BOOT_CS#id1
256    let gdt_table: [u64; 5] = [
257        gdt::gdt_entry(0, 0, 0),            // NULL
258        gdt::gdt_entry(0, 0, 0),            // NULL
259        gdt::gdt_entry(0xc09b, 0, 0xfffff), // CODE
260        gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
261        gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
262    ];
263
264    let code_seg = gdt::segment_from_gdt(gdt_table[2], 2);
265    let data_seg = gdt::segment_from_gdt(gdt_table[3], 3);
266    let tss_seg = gdt::segment_from_gdt(gdt_table[4], 4);
267
268    // Write segments
269    write_gdt_table(&gdt_table[..], mem)?;
270    sregs.gdt.base = BOOT_GDT_OFFSET;
271    sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
272
273    write_idt_value(0, mem)?;
274    sregs.idt.base = BOOT_IDT_OFFSET;
275    sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
276
277    sregs.cs = code_seg;
278    sregs.ds = data_seg;
279    sregs.es = data_seg;
280    sregs.fs = data_seg;
281    sregs.gs = data_seg;
282    sregs.ss = data_seg;
283    sregs.tr = tss_seg;
284
285    /* 32-bit protected mode with paging disabled */
286    sregs.cr0 |= X86_CR0_PE;
287    sregs.cr0 &= !X86_CR0_PG;
288
289    Ok(())
290}
291
292/// Configures the system page tables and control registers for long mode with paging.
293/// Prepares identity mapping for the low 4GB memory.
294pub fn setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
295    // Puts PML4 right after zero page but aligned to 4k.
296    let boot_pml4_addr = GuestAddress(0x9000);
297    let boot_pdpte_addr = GuestAddress(0xa000);
298    let boot_pde_addr = GuestAddress(0xb000);
299
300    const PDE_FLAGS_TABLE_REFERENCE: u64 = 0x03; // Present | Read/Write
301    const PDE_FLAGS_PAGE_MAPPING: u64 = 0x83; // Present | Read/Write | Page Size
302
303    // Entry covering VA [0..512GB)
304    mem.write_obj_at_addr(
305        boot_pdpte_addr.offset() | PDE_FLAGS_TABLE_REFERENCE,
306        boot_pml4_addr,
307    )
308    .map_err(|_| Error::WritePML4Address)?;
309
310    // Identity mapping for VA [0..4GB)
311    for i in 0..4 {
312        let pde_addr = boot_pde_addr.unchecked_add(i * 0x1000);
313
314        // Entry covering a single 1GB VA area
315        mem.write_obj_at_addr(
316            pde_addr.offset() | PDE_FLAGS_TABLE_REFERENCE,
317            boot_pdpte_addr.unchecked_add(i * 8),
318        )
319        .map_err(|_| Error::WritePDPTEAddress)?;
320
321        // 512 2MB entries together covering a single 1GB VA area. Note we are assuming
322        // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
323        for j in 0..512 {
324            mem.write_obj_at_addr(
325                (i << 30) | (j << 21) | PDE_FLAGS_PAGE_MAPPING,
326                pde_addr.unchecked_add(j * 8),
327            )
328            .map_err(|_| Error::WritePDEAddress)?;
329        }
330    }
331
332    sregs.cr3 = boot_pml4_addr.offset();
333    sregs.cr4 |= X86_CR4_PAE;
334    sregs.cr0 |= X86_CR0_PG;
335    sregs.efer |= EFER_LMA; // Long mode is active. Must be auto-enabled with CR0_PG.
336    Ok(())
337}
338
339#[cfg(test)]
340mod tests {
341    use vm_memory::GuestAddress;
342    use vm_memory::GuestMemory;
343
344    use super::*;
345
346    fn create_guest_mem() -> GuestMemory {
347        GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap()
348    }
349
350    fn read_u64(gm: &GuestMemory, offset: u64) -> u64 {
351        let read_addr = GuestAddress(offset);
352        gm.read_obj_from_addr(read_addr).unwrap()
353    }
354
355    #[test]
356    fn segments_and_sregs() {
357        let mut sregs = Default::default();
358        let gm = create_guest_mem();
359        configure_segments_and_sregs(&gm, &mut sregs).unwrap();
360
361        assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
362        assert_eq!(0xaf9b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x10));
363        assert_eq!(0xcf93000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x18));
364        assert_eq!(0x8f8b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x20));
365        assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
366
367        assert_eq!(0, sregs.cs.base);
368        assert_eq!(0xffffffff, sregs.ds.limit_bytes);
369        assert_eq!(0x10, sregs.cs.selector);
370        assert_eq!(0x18, sregs.ds.selector);
371        assert_eq!(0x18, sregs.es.selector);
372        assert_eq!(0x18, sregs.ss.selector);
373        assert_eq!(1, sregs.fs.present);
374        assert_eq!(1, sregs.gs.g);
375        assert_eq!(0, sregs.ss.avl);
376        assert_eq!(0, sregs.tr.base);
377        assert_eq!(0xffffffff, sregs.tr.limit_bytes);
378        assert_eq!(0, sregs.tr.avl);
379        assert_eq!(X86_CR0_PE, sregs.cr0 & X86_CR0_PE);
380        assert_eq!(EFER_LME, sregs.efer);
381    }
382
383    #[test]
384    fn page_tables() {
385        let mut sregs = Default::default();
386        let gm = create_guest_mem();
387        setup_page_tables(&gm, &mut sregs).unwrap();
388
389        assert_eq!(0xa003, read_u64(&gm, 0x9000));
390        assert_eq!(0xb003, read_u64(&gm, 0xa000));
391        for i in 0..512 {
392            assert_eq!((i << 21) + 0x83u64, read_u64(&gm, 0xb000 + i * 8));
393        }
394
395        assert_eq!(0x9000, sregs.cr3);
396        assert_eq!(X86_CR4_PAE, sregs.cr4);
397        assert_eq!(X86_CR0_PG, sregs.cr0 & X86_CR0_PG);
398    }
399}