// Copyright 2022 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

//! Handles operations using platform Time Stamp Counter (TSC).

// TODO(b/213149158): Remove after uses are added.
#![allow(dead_code)]

use std::arch::x86_64::_rdtsc;
use std::sync::LazyLock;

use anyhow::anyhow;
use anyhow::Result;
use base::debug;
use base::error;

mod calibrate;
mod cpuid;
mod grouping;

pub use calibrate::*;
pub use cpuid::*;

fn rdtsc_safe() -> u64 {
    // SAFETY:
    // Safe because _rdtsc takes no arguments
    unsafe { _rdtsc() }
}

// Singleton for getting the state of the host TSCs, to avoid calibrating multiple times.
static TSC_STATE: LazyLock<Option<TscState>> = LazyLock::new(|| match calibrate_tsc_state() {
    Ok(tsc_state) => {
        debug!("Using calibrated tsc frequency: {} Hz", tsc_state.frequency);
        for (core, offset) in tsc_state.offsets.iter().enumerate() {
            debug!("Core {} has tsc offset of {:?} ns", core, offset);
        }
        Some(tsc_state)
    }
    Err(e) => {
        error!("Failed to calibrate tsc state: {:#}", e);
        None
    }
});

/// Returns the frequency of the host TSC. Calibration only happens once.
pub fn tsc_frequency() -> Result<u64> {
    let state = TSC_STATE
        .as_ref()
        .ok_or(anyhow!("TSC calibration failed"))?;
    Ok(state.frequency)
}

/// Returns the state of the host TSCs. Calibration only happens once.
pub fn tsc_state() -> Result<TscState> {
    Ok(TSC_STATE
        .as_ref()
        .ok_or(anyhow!("TSC calibration failed"))?
        .clone())
}

#[derive(Default, Debug)]
pub struct TscSyncMitigations {
    /// Vec of per-vcpu affinities to apply to each vcpu thread. If None, no affinity should be
    /// applied.
    pub affinities: Vec<Option<Vec<usize>>>,
    /// Vec of TSC offsets to set on each vcpu. If None, no offset should be applied.
    pub offsets: Vec<Option<u64>>,
}

impl TscSyncMitigations {
    fn new(num_vcpus: usize) -> Self {
        TscSyncMitigations {
            affinities: vec![None; num_vcpus],
            offsets: vec![None; num_vcpus],
        }
    }

    pub fn get_vcpu_affinity(&self, cpu_id: usize) -> Option<Vec<usize>> {
        self.affinities.get(cpu_id).unwrap().clone()
    }

    pub fn get_vcpu_tsc_offset(&self, cpu_id: usize) -> Option<u64> {
        *self.offsets.get(cpu_id).unwrap()
    }
}

/// Given the state of the host TSCs in `tsc_state`, and the number of vcpus that are intended to
/// be run, return a set of affinities and TSC offsets to apply to those vcpus.
pub fn get_tsc_sync_mitigations(tsc_state: &TscState, num_vcpus: usize) -> TscSyncMitigations {
    tsc_sync_mitigations_inner(tsc_state, num_vcpus, rdtsc_safe)
}

fn tsc_sync_mitigations_inner(
    tsc_state: &TscState,
    num_vcpus: usize,
    rdtsc: fn() -> u64,
) -> TscSyncMitigations {
    let mut mitigations = TscSyncMitigations::new(num_vcpus);
    // If there's only one core grouping that means all the TSCs are in sync and no mitigations are
    // needed.
    if tsc_state.core_grouping.size() == 1 {
        return mitigations;
    }

    let largest_group = tsc_state.core_grouping.largest_group();
    let num_cores = tsc_state.offsets.len();

    // If the largest core group is larger than the number of vcpus, just pin all vcpus to that core
    // group, and no need to set offsets.
    if largest_group.cores.len() >= num_vcpus {
        let affinity: Vec<usize> = largest_group.cores.iter().map(|core| core.core).collect();
        for i in 0..num_vcpus {
            mitigations.affinities[i] = Some(affinity.clone());
        }
    } else {
        // Otherwise, we pin each vcpu to a core and set it's offset to compensate.
        let host_tsc_now = rdtsc();

        for i in 0..num_vcpus {
            // This handles the case where num_vcpus > num_cores, even though we try to avoid that
            // in practice.
            let pinned_core = i % num_cores;

            mitigations.affinities[i] = Some(vec![pinned_core]);
            // The guest TSC value is calculated like so:
            //   host_tsc + tsc_offset = guest_tsc
            // If we assume that each host core has it's own error (core_offset), then it's more
            // like this:
            //   host_tsc + core_offset + tsc_offset = guest_tsc
            // We want guest_tsc to be 0 at boot, so the formula is this:
            //   host_tsc + core_offset + tsc_offset = 0
            // and then you subtract host_tsc and core_offset from both sides and you get:
            //   tsc_offset = 0 - host_tsc - core_offset
            mitigations.offsets[i] = Some(
                0u64.wrapping_sub(host_tsc_now)
                    // Note: wrapping_add and casting tsc_state from an i64 to a u64 should be the
                    //  same as using the future wrapping_add_signed function, which is only in
                    //  nightly. This should be switched to using wrapping_add_signed once that is
                    //  in stable.
                    .wrapping_add(tsc_state.offsets[pinned_core].1.wrapping_neg() as i64 as u64),
            );
        }
    }

    mitigations
}

#[cfg(test)]
mod tests {
    use std::time::Duration;

    use super::*;
    use crate::tsc::grouping::CoreGroup;
    use crate::tsc::grouping::CoreGrouping;
    use crate::tsc::grouping::CoreOffset;

    #[test]
    fn test_sync_mitigation_set_offsets() {
        let offsets = vec![(0, 0), (1, 1000), (2, -1000), (3, 2000)];
        // frequency of 1GHz means 20 nanos is 20 ticks
        let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
            .expect("TscState::new should not fail for this test");

        assert_eq!(
            state.core_grouping,
            CoreGrouping::new(vec![
                CoreGroup {
                    cores: vec![CoreOffset {
                        core: 2,
                        offset: -1000
                    }]
                },
                CoreGroup {
                    cores: vec![CoreOffset { core: 0, offset: 0 }]
                },
                CoreGroup {
                    cores: vec![CoreOffset {
                        core: 1,
                        offset: 1000
                    }]
                },
                CoreGroup {
                    cores: vec![CoreOffset {
                        core: 3,
                        offset: 2000
                    }]
                },
            ])
            .expect("CoreGrouping::new should not fail here")
        );

        fn fake_rdtsc() -> u64 {
            u64::MAX
        }

        let mitigations = tsc_sync_mitigations_inner(&state, 4, fake_rdtsc);

        // core offsets are:
        //  - core 0: has an offset of 0, so TSC offset = 0 - u64::MAX - 0 = 1
        //  - core 1: has an offset of 1000, so TSC offset = 0 - u64::MAX - 1000 = -999
        //  - core 2: has an offset of -1000, so TSC offset = 0 - u64::MAX + 1000 = 1001
        //  - core 3: has an offset of 2000, so TSC offset = 0 - u64::MAX - 2000 = -1999
        let expected = [1, 1u64.wrapping_sub(1000), 1001u64, 1u64.wrapping_sub(2000)];

        for (i, expect) in expected.iter().enumerate() {
            assert_eq!(
                mitigations
                    .get_vcpu_tsc_offset(i)
                    .unwrap_or_else(|| panic!("core {} should have an offset of {}", i, expect)),
                *expect
            );

            assert_eq!(
                mitigations
                    .get_vcpu_affinity(i)
                    .unwrap_or_else(|| panic!("core {} should have an affinity of [{}]", i, i)),
                vec![i]
            );
        }
    }

    #[test]
    fn test_sync_mitigation_large_group() {
        // 8 cores, and cores 1,3,5,7 are in-sync at offset -1000
        let offsets = vec![
            (0, 0),
            (1, -1000),
            (2, 1000),
            (3, -1000),
            (4, 2000),
            (5, -1000),
            (6, 3000),
            (7, -1000),
        ];
        // frequency of 1GHz means 20 nanos is 20 ticks
        let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
            .expect("TscState::new should not fail for this test");

        assert_eq!(
            state.core_grouping,
            CoreGrouping::new(vec![
                CoreGroup {
                    cores: vec![
                        CoreOffset {
                            core: 1,
                            offset: -1000
                        },
                        CoreOffset {
                            core: 3,
                            offset: -1000
                        },
                        CoreOffset {
                            core: 5,
                            offset: -1000
                        },
                        CoreOffset {
                            core: 7,
                            offset: -1000
                        }
                    ]
                },
                CoreGroup {
                    cores: vec![CoreOffset { core: 0, offset: 0 }]
                },
                CoreGroup {
                    cores: vec![CoreOffset {
                        core: 2,
                        offset: 1000
                    }]
                },
                CoreGroup {
                    cores: vec![CoreOffset {
                        core: 4,
                        offset: 2000
                    }]
                },
                CoreGroup {
                    cores: vec![CoreOffset {
                        core: 6,
                        offset: 3000
                    }]
                },
            ])
            .expect("CoreGrouping::new should not fail here")
        );

        fn fake_rdtsc() -> u64 {
            u64::MAX
        }

        let num_vcpus = 4;
        let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);

        let expected_affinity = vec![1, 3, 5, 7];
        for i in 0..num_vcpus {
            assert_eq!(
                mitigations.get_vcpu_affinity(i).unwrap_or_else(|| panic!(
                    "core {} should have an affinity of {:?}",
                    i, expected_affinity
                )),
                expected_affinity
            );
            assert_eq!(mitigations.get_vcpu_tsc_offset(i), None);
        }
    }

    #[test]
    fn more_vcpus_than_cores() {
        // 4 cores, two can be grouped but it doesn't matter because we'll have more vcpus than
        // the largest group.
        let offsets = vec![(0, 0), (1, 0), (2, 1000), (3, 2000)];
        // frequency of 1GHz means 20 nanos is 20 ticks
        let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
            .expect("TscState::new should not fail for this test");

        assert_eq!(
            state.core_grouping,
            CoreGrouping::new(vec![
                CoreGroup {
                    cores: vec![
                        CoreOffset { core: 0, offset: 0 },
                        CoreOffset { core: 1, offset: 0 }
                    ]
                },
                CoreGroup {
                    cores: vec![CoreOffset {
                        core: 2,
                        offset: 1000
                    }]
                },
                CoreGroup {
                    cores: vec![CoreOffset {
                        core: 3,
                        offset: 2000
                    }]
                },
            ])
            .expect("CoreGrouping::new should not fail here")
        );

        fn fake_rdtsc() -> u64 {
            u64::MAX
        }

        // 8 vcpus, more than we have cores
        let num_vcpus = 8;
        let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);
        let expected_offsets = [1, 1, 1u64.wrapping_sub(1000), 1u64.wrapping_sub(2000)];

        for i in 0..num_vcpus {
            assert_eq!(
                mitigations.get_vcpu_affinity(i).unwrap_or_else(|| panic!(
                    "core {} should have an affinity of {:?}",
                    i,
                    i % 4
                )),
                // expected affinity is the vcpu modulo 4
                vec![i % 4]
            );
            assert_eq!(
                mitigations.get_vcpu_tsc_offset(i).unwrap_or_else(|| panic!(
                    "core {} should have an offset of {:?}",
                    i,
                    expected_offsets[i % 4]
                )),
                expected_offsets[i % 4]
            );
        }
    }
}