1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
// Copyright 2017 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

use std::convert::TryFrom;
use std::mem;
use std::result;

use devices::PciAddress;
use devices::PciInterruptPin;
use remain::sorted;
use thiserror::Error;
use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use zerocopy::AsBytes;

use crate::mpspec::*;

#[sorted]
#[derive(Error, Debug)]
pub enum Error {
    /// The MP table has too little address space to be stored.
    #[error("The MP table has too little address space to be stored")]
    AddressOverflow,
    /// Failure while zeroing out the memory for the MP table.
    #[error("Failure while zeroing out the memory for the MP table")]
    Clear,
    /// There was too little guest memory to store the entire MP table.
    #[error("There was too little guest memory to store the MP table")]
    NotEnoughMemory,
    /// Failure to write MP bus entry.
    #[error("Failure to write MP bus entry")]
    WriteMpcBus,
    /// Failure to write MP CPU entry.
    #[error("Failure to write MP CPU entry")]
    WriteMpcCpu,
    /// Failure to write MP interrupt source entry.
    #[error("Failure to write MP interrupt source entry")]
    WriteMpcIntsrc,
    /// Failure to write MP ioapic entry.
    #[error("Failure to write MP ioapic entry")]
    WriteMpcIoapic,
    /// Failure to write MP local interrupt source entry.
    #[error("Failure to write MP local interrupt source entry")]
    WriteMpcLintsrc,
    /// Failure to write MP table header.
    #[error("Failure to write MP table header")]
    WriteMpcTable,
    /// Failure to write the MP floating pointer.
    #[error("Failure to write the MP floating pointer")]
    WriteMpfIntel,
}

pub type Result<T> = result::Result<T, Error>;

// Most of these variables are sourced from the Intel MP Spec 1.4.
const SMP_MAGIC_IDENT: [u8; 4] = *b"_MP_";
const MPC_SIGNATURE: [u8; 4] = *b"PCMP";
const MPC_SPEC: i8 = 4;
const MPC_OEM: [u8; 8] = *b"CROSVM  ";
const MPC_PRODUCT_ID: [u8; 12] = *b"000000000000";
const BUS_TYPE_ISA: [u8; 6] = *b"ISA   ";
const BUS_TYPE_PCI: [u8; 6] = *b"PCI   ";
// source: linux/arch/x86/include/asm/apicdef.h
pub const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec00000;
// source: linux/arch/x86/include/asm/apicdef.h
pub const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee00000;
const APIC_VERSION: u8 = 0x14;
const CPU_STEPPING: u32 = 0x600;
const CPU_FEATURE_APIC: u32 = 0x200;
const CPU_FEATURE_FPU: u32 = 0x001;
const MPTABLE_START: u64 = 0x400 * 639; // Last 1k of Linux's 640k base RAM.

fn compute_checksum<T: AsBytes>(v: &T) -> u8 {
    let mut checksum: u8 = 0;
    for i in v.as_bytes() {
        checksum = checksum.wrapping_add(*i);
    }
    checksum
}

fn mpf_intel_compute_checksum(v: &mpf_intel) -> u8 {
    let checksum = compute_checksum(v).wrapping_sub(v.checksum);
    (!checksum).wrapping_add(1)
}

fn compute_mp_size(num_cpus: u8) -> usize {
    mem::size_of::<mpf_intel>()
        + mem::size_of::<mpc_table>()
        + mem::size_of::<mpc_cpu>() * (num_cpus as usize)
        + mem::size_of::<mpc_ioapic>()
        + mem::size_of::<mpc_bus>() * 2
        + mem::size_of::<mpc_intsrc>()
        + mem::size_of::<mpc_intsrc>() * 16
        + mem::size_of::<mpc_lintsrc>() * 2
}

/// Performs setup of the MP table for the given `num_cpus`.
pub fn setup_mptable(
    mem: &GuestMemory,
    num_cpus: u8,
    pci_irqs: &[(PciAddress, u32, PciInterruptPin)],
) -> Result<()> {
    // Used to keep track of the next base pointer into the MP table.
    let mut base_mp = GuestAddress(MPTABLE_START);

    // Calculate ISA bus number in the system, report at least one PCI bus.
    let isa_bus_id = match pci_irqs.iter().max_by_key(|v| v.0.bus) {
        Some(pci_irq) => pci_irq.0.bus + 1,
        _ => 1,
    };
    let mp_size = compute_mp_size(num_cpus);

    // The checked_add here ensures the all of the following base_mp.unchecked_add's will be without
    // overflow.
    if let Some(end_mp) = base_mp.checked_add(mp_size as u64 - 1) {
        if !mem.address_in_range(end_mp) {
            return Err(Error::NotEnoughMemory);
        }
    } else {
        return Err(Error::AddressOverflow);
    }

    mem.get_slice_at_addr(base_mp, mp_size)
        .map_err(|_| Error::Clear)?
        .write_bytes(0);

    {
        let size = mem::size_of::<mpf_intel>();
        let mut mpf_intel = mpf_intel::default();
        mpf_intel.signature = SMP_MAGIC_IDENT;
        mpf_intel.length = 1;
        mpf_intel.specification = 4;
        mpf_intel.physptr = (base_mp.offset() + mem::size_of::<mpf_intel>() as u64) as u32;
        mpf_intel.checksum = mpf_intel_compute_checksum(&mpf_intel);
        mem.write_obj_at_addr(mpf_intel, base_mp)
            .map_err(|_| Error::WriteMpfIntel)?;
        base_mp = base_mp.unchecked_add(size as u64);
    }

    // We set the location of the mpc_table here but we can't fill it out until we have the length
    // of the entire table later.
    let table_base = base_mp;
    base_mp = base_mp.unchecked_add(mem::size_of::<mpc_table>() as u64);

    let mut checksum: u8 = 0;
    let ioapicid: u8 = num_cpus + 1;

    for cpu_id in 0..num_cpus {
        let size = mem::size_of::<mpc_cpu>();
        let mpc_cpu = mpc_cpu {
            type_: MP_PROCESSOR as u8,
            apicid: cpu_id,
            apicver: APIC_VERSION,
            cpuflag: CPU_ENABLED as u8
                | if cpu_id == 0 {
                    CPU_BOOTPROCESSOR as u8
                } else {
                    0
                },
            cpufeature: CPU_STEPPING,
            featureflag: CPU_FEATURE_APIC | CPU_FEATURE_FPU,
            ..Default::default()
        };
        mem.write_obj_at_addr(mpc_cpu, base_mp)
            .map_err(|_| Error::WriteMpcCpu)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_cpu));
    }
    {
        let size = mem::size_of::<mpc_ioapic>();
        let mpc_ioapic = mpc_ioapic {
            type_: MP_IOAPIC as u8,
            apicid: ioapicid,
            apicver: APIC_VERSION,
            flags: MPC_APIC_USABLE as u8,
            apicaddr: IO_APIC_DEFAULT_PHYS_BASE,
        };
        mem.write_obj_at_addr(mpc_ioapic, base_mp)
            .map_err(|_| Error::WriteMpcIoapic)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_ioapic));
    }
    for pci_bus_id in 0..isa_bus_id {
        let size = mem::size_of::<mpc_bus>();
        let mpc_bus = mpc_bus {
            type_: MP_BUS as u8,
            busid: pci_bus_id,
            bustype: BUS_TYPE_PCI,
        };
        mem.write_obj_at_addr(mpc_bus, base_mp)
            .map_err(|_| Error::WriteMpcBus)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_bus));
    }
    {
        let size = mem::size_of::<mpc_bus>();
        let mpc_bus = mpc_bus {
            type_: MP_BUS as u8,
            busid: isa_bus_id,
            bustype: BUS_TYPE_ISA,
        };
        mem.write_obj_at_addr(mpc_bus, base_mp)
            .map_err(|_| Error::WriteMpcBus)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_bus));
    }
    {
        let size = mem::size_of::<mpc_intsrc>();
        let mpc_intsrc = mpc_intsrc {
            type_: MP_LINTSRC as u8,
            irqtype: mp_irq_source_types_mp_INT as u8,
            irqflag: MP_IRQDIR_DEFAULT as u16,
            srcbus: isa_bus_id,
            srcbusirq: 0,
            dstapic: 0,
            dstirq: 0,
        };
        mem.write_obj_at_addr(mpc_intsrc, base_mp)
            .map_err(|_| Error::WriteMpcIntsrc)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_intsrc));
    }
    let sci_irq = super::X86_64_SCI_IRQ as u8;
    // Per kvm_setup_default_irq_routing() in kernel
    for i in (0..sci_irq).chain(std::iter::once(devices::cmos::RTC_IRQ)) {
        let size = mem::size_of::<mpc_intsrc>();
        let mpc_intsrc = mpc_intsrc {
            type_: MP_INTSRC as u8,
            irqtype: mp_irq_source_types_mp_INT as u8,
            irqflag: MP_IRQDIR_DEFAULT as u16,
            srcbus: isa_bus_id,
            srcbusirq: i,
            dstapic: ioapicid,
            dstirq: i,
        };
        mem.write_obj_at_addr(mpc_intsrc, base_mp)
            .map_err(|_| Error::WriteMpcIntsrc)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_intsrc));
    }
    // Insert SCI interrupt before PCI interrupts. Set the SCI interrupt
    // to be the default trigger/polarity of PCI bus, which is level/low.
    // This setting can be changed in future if necessary.
    {
        let size = mem::size_of::<mpc_intsrc>();
        let mpc_intsrc = mpc_intsrc {
            type_: MP_INTSRC as u8,
            irqtype: mp_irq_source_types_mp_INT as u8,
            irqflag: (MP_IRQDIR_HIGH | MP_LEVEL_TRIGGER) as u16,
            srcbus: isa_bus_id,
            srcbusirq: sci_irq,
            dstapic: ioapicid,
            dstirq: sci_irq,
        };
        mem.write_obj_at_addr(mpc_intsrc, base_mp)
            .map_err(|_| Error::WriteMpcIntsrc)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_intsrc));
    }

    // Insert PCI interrupts after platform IRQs.
    for (address, irq_num, irq_pin) in pci_irqs.iter() {
        let size = mem::size_of::<mpc_intsrc>();
        let mpc_intsrc = mpc_intsrc {
            type_: MP_INTSRC as u8,
            irqtype: mp_irq_source_types_mp_INT as u8,
            irqflag: MP_IRQDIR_DEFAULT as u16,
            srcbus: address.bus,
            srcbusirq: address.dev << 2 | irq_pin.to_mask() as u8,
            dstapic: ioapicid,
            dstirq: u8::try_from(*irq_num).map_err(|_| Error::WriteMpcIntsrc)?,
        };
        mem.write_obj_at_addr(mpc_intsrc, base_mp)
            .map_err(|_| Error::WriteMpcIntsrc)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_intsrc));
    }

    let starting_isa_irq_num = pci_irqs
        .iter()
        .map(|(_, irq_num, _)| irq_num + 1)
        .fold(super::X86_64_IRQ_BASE, u32::max) as u8;

    // Finally insert ISA interrupts.
    for i in starting_isa_irq_num..16 {
        let size = mem::size_of::<mpc_intsrc>();
        let mpc_intsrc = mpc_intsrc {
            type_: MP_INTSRC as u8,
            irqtype: mp_irq_source_types_mp_INT as u8,
            irqflag: MP_IRQDIR_DEFAULT as u16,
            srcbus: isa_bus_id,
            srcbusirq: i,
            dstapic: ioapicid,
            dstirq: i,
        };
        mem.write_obj_at_addr(mpc_intsrc, base_mp)
            .map_err(|_| Error::WriteMpcIntsrc)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_intsrc));
    }
    {
        let size = mem::size_of::<mpc_lintsrc>();
        let mpc_lintsrc = mpc_lintsrc {
            type_: MP_LINTSRC as u8,
            irqtype: mp_irq_source_types_mp_ExtINT as u8,
            irqflag: MP_IRQDIR_DEFAULT as u16,
            srcbusid: isa_bus_id,
            srcbusirq: 0,
            destapic: 0,
            destapiclint: 0,
        };
        mem.write_obj_at_addr(mpc_lintsrc, base_mp)
            .map_err(|_| Error::WriteMpcLintsrc)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_lintsrc));
    }
    {
        let size = mem::size_of::<mpc_lintsrc>();
        let mpc_lintsrc = mpc_lintsrc {
            type_: MP_LINTSRC as u8,
            irqtype: mp_irq_source_types_mp_NMI as u8,
            irqflag: MP_IRQDIR_DEFAULT as u16,
            srcbusid: isa_bus_id,
            srcbusirq: 0,
            destapic: 0xFF, // Per SeaBIOS
            destapiclint: 1,
        };
        mem.write_obj_at_addr(mpc_lintsrc, base_mp)
            .map_err(|_| Error::WriteMpcLintsrc)?;
        base_mp = base_mp.unchecked_add(size as u64);
        checksum = checksum.wrapping_add(compute_checksum(&mpc_lintsrc));
    }

    // At this point we know the size of the mp_table.
    let table_end = base_mp;

    {
        let mut mpc_table = mpc_table {
            signature: MPC_SIGNATURE,
            length: table_end.offset_from(table_base) as u16,
            spec: MPC_SPEC,
            oem: MPC_OEM,
            productid: MPC_PRODUCT_ID,
            lapic: APIC_DEFAULT_PHYS_BASE,
            ..Default::default()
        };
        checksum = checksum.wrapping_add(compute_checksum(&mpc_table));
        mpc_table.checksum = (!checksum).wrapping_add(1) as i8;
        mem.write_obj_at_addr(mpc_table, table_base)
            .map_err(|_| Error::WriteMpcTable)?;
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use base::pagesize;

    use super::*;

    fn compute_page_aligned_mp_size(num_cpus: u8) -> u64 {
        let mp_size = compute_mp_size(num_cpus);
        let pg_size = pagesize();
        (mp_size + pg_size - (mp_size % pg_size)) as u64
    }

    fn table_entry_size(type_: u8) -> usize {
        match type_ as u32 {
            MP_PROCESSOR => mem::size_of::<mpc_cpu>(),
            MP_BUS => mem::size_of::<mpc_bus>(),
            MP_IOAPIC => mem::size_of::<mpc_ioapic>(),
            MP_INTSRC => mem::size_of::<mpc_intsrc>(),
            MP_LINTSRC => mem::size_of::<mpc_lintsrc>(),
            _ => panic!("unrecognized mpc table entry type: {}", type_),
        }
    }

    #[test]
    fn bounds_check() {
        let num_cpus = 4;
        let mem = GuestMemory::new(&[(
            GuestAddress(MPTABLE_START),
            compute_page_aligned_mp_size(num_cpus),
        )])
        .unwrap();

        setup_mptable(&mem, num_cpus, &[]).unwrap();
    }

    #[test]
    fn bounds_check_fails() {
        let num_cpus = 255;
        let mem = GuestMemory::new(&[(GuestAddress(MPTABLE_START), 0x1000)]).unwrap();

        assert!(setup_mptable(&mem, num_cpus, &[]).is_err());
    }

    #[test]
    fn mpf_intel_checksum() {
        let num_cpus = 1;
        let mem = GuestMemory::new(&[(
            GuestAddress(MPTABLE_START),
            compute_page_aligned_mp_size(num_cpus),
        )])
        .unwrap();

        setup_mptable(&mem, num_cpus, &[]).unwrap();

        let mpf_intel = mem.read_obj_from_addr(GuestAddress(MPTABLE_START)).unwrap();

        assert_eq!(mpf_intel_compute_checksum(&mpf_intel), mpf_intel.checksum);
    }

    #[test]
    fn mpc_table_checksum() {
        let num_cpus = 4;
        let mem = GuestMemory::new(&[(
            GuestAddress(MPTABLE_START),
            compute_page_aligned_mp_size(num_cpus),
        )])
        .unwrap();

        setup_mptable(&mem, num_cpus, &[]).unwrap();

        let mpf_intel: mpf_intel = mem.read_obj_from_addr(GuestAddress(MPTABLE_START)).unwrap();
        let mpc_offset = GuestAddress(mpf_intel.physptr as u64);
        let mpc_table: mpc_table = mem.read_obj_from_addr(mpc_offset).unwrap();

        let mut buf = vec![0; mpc_table.length as usize];
        mem.read_at_addr(&mut buf[..], mpc_offset).unwrap();
        let mut sum: u8 = 0;
        for &v in &buf {
            sum = sum.wrapping_add(v);
        }

        assert_eq!(sum, 0);
    }

    #[test]
    fn cpu_entry_count() {
        const MAX_CPUS: u8 = 0xff;
        let mem = GuestMemory::new(&[(
            GuestAddress(MPTABLE_START),
            compute_page_aligned_mp_size(MAX_CPUS),
        )])
        .unwrap();

        for i in 0..MAX_CPUS {
            setup_mptable(&mem, i, &[]).unwrap();

            let mpf_intel: mpf_intel = mem.read_obj_from_addr(GuestAddress(MPTABLE_START)).unwrap();
            let mpc_offset = GuestAddress(mpf_intel.physptr as u64);
            let mpc_table: mpc_table = mem.read_obj_from_addr(mpc_offset).unwrap();
            let mpc_end = mpc_offset.checked_add(mpc_table.length as u64).unwrap();

            let mut entry_offset = mpc_offset
                .checked_add(mem::size_of::<mpc_table>() as u64)
                .unwrap();
            let mut cpu_count = 0;
            while entry_offset < mpc_end {
                let entry_type: u8 = mem.read_obj_from_addr(entry_offset).unwrap();
                entry_offset = entry_offset
                    .checked_add(table_entry_size(entry_type) as u64)
                    .unwrap();
                assert!(entry_offset <= mpc_end);
                if entry_type as u32 == MP_PROCESSOR {
                    cpu_count += 1;
                }
            }
            assert_eq!(cpu_count, i);
        }
    }
}