devices/tsc/
calibrate.rs

1// Copyright 2022 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::collections::HashSet;
6use std::iter::FromIterator;
7use std::time::Duration;
8use std::time::Instant;
9
10use anyhow::anyhow;
11use anyhow::Context;
12use anyhow::Result;
13use base::set_cpu_affinity;
14use base::warn;
15use remain::sorted;
16use thiserror::Error;
17
18use super::grouping::*;
19use super::rdtsc_safe;
20
21const TSC_CALIBRATION_SAMPLES: usize = 10;
22const TSC_CALIBRATION_DURATION: Duration = Duration::from_millis(100);
23// remove data that is outside 3 standard deviations off the median
24const TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT: f64 = 3.0;
25// We consider two TSC cores to be in sync if they are within 2 microseconds of each other.
26// An optimal context switch takes about 1-3 microseconds.
27const TSC_OFFSET_GROUPING_THRESHOLD: Duration = Duration::from_micros(2);
28
29#[sorted]
30#[derive(Error, Debug)]
31pub enum TscCalibrationError {
32    /// Received `err` when setting the cpu affinity to `core`
33    #[error("failed to set thread cpu affinity to core {core}: {err}")]
34    SetCpuAffinityError { core: usize, err: base::Error },
35}
36
37/// Get the standard deviation of a `Vec<T>`.
38pub fn standard_deviation<T: num_traits::ToPrimitive + num_traits::Num + Copy>(items: &[T]) -> f64 {
39    let sum: T = items.iter().fold(T::zero(), |acc: T, elem| acc + *elem);
40    let count = items.len();
41
42    let mean: f64 = sum.to_f64().unwrap_or(0.0) / count as f64;
43
44    let variance = items
45        .iter()
46        .map(|x| {
47            let diff = mean - (x.to_f64().unwrap_or(0.0));
48            diff * diff
49        })
50        .sum::<f64>();
51    (variance / count as f64).sqrt()
52}
53
54fn sort_and_get_bounds(items: &mut [i128], stdev_limit: f64) -> (f64, f64) {
55    items.sort_unstable();
56    let median = items[items.len() / 2];
57
58    let standard_deviation = standard_deviation(items);
59    let lower_bound = median as f64 - stdev_limit * standard_deviation;
60    let upper_bound = median as f64 + stdev_limit * standard_deviation;
61    (lower_bound, upper_bound)
62}
63
64/// Represents the host monotonic time and the TSC value at a single moment in time.
65#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
66struct TscMoment {
67    time: Instant,
68    tsc: u64,
69}
70
71impl TscMoment {
72    fn now(rdtsc: fn() -> u64) -> Self {
73        TscMoment {
74            time: Instant::now(),
75            tsc: rdtsc(),
76        }
77    }
78
79    /// Measure the tsc frequency using two `TscMoment`s.
80    fn measure_tsc_frequency(first: &TscMoment, second: &TscMoment) -> i128 {
81        // handle case where first is actually second in time
82        let (first, second) = if first.time > second.time {
83            (second, first)
84        } else {
85            (first, second)
86        };
87
88        let time_delta = second.time - first.time;
89        let tsc_delta = second.tsc as i128 - first.tsc as i128;
90
91        tsc_delta * 1_000_000_000i128 / time_delta.as_nanos() as i128
92    }
93
94    /// Measure the tsc offset using two `TscMoment`s and the TSC frequency.
95    fn measure_tsc_offset(first: &TscMoment, second: &TscMoment, tsc_frequency: u64) -> i128 {
96        // handle case where first is actually second in time
97        let (first, second) = if first.time > second.time {
98            (second, first)
99        } else {
100            (first, second)
101        };
102
103        let tsc_delta = second.tsc as i128 - first.tsc as i128;
104        let time_delta_as_tsc_ticks =
105            (second.time - first.time).as_nanos() * tsc_frequency as u128 / 1_000_000_000u128;
106        tsc_delta - time_delta_as_tsc_ticks as i128
107    }
108}
109
110#[derive(Default, Debug, Clone)]
111pub struct TscState {
112    pub frequency: u64,
113    pub offsets: Vec<(usize, i128)>,
114    pub core_grouping: CoreGrouping,
115}
116
117impl TscState {
118    pub(crate) fn new(
119        tsc_frequency: u64,
120        offsets: Vec<(usize, i128)>,
121        in_sync_threshold: Duration,
122    ) -> Result<Self> {
123        let core_grouping = group_core_offsets(&offsets, in_sync_threshold, tsc_frequency)
124            .context("Failed to group cores by their TSC offsets")?;
125        Ok(TscState {
126            frequency: tsc_frequency,
127            offsets,
128            core_grouping,
129        })
130    }
131}
132
133/// Calibrate the TSC frequency of `core`.
134///
135/// This function first pins itself to `core`, generates `num_samples` start `TscMoment`s, sleeps
136/// for `calibration_duration`, and then generates `num_samples` end `TscMoment`s. For each pair
137/// of start and end moments, a TSC frequency value is calculated. Any frequencies that are
138/// outside of `stddev_limit` standard deviations from the median offset are discarded, because
139/// they may represent an interrupt that occurred while a TscMoment was generated. The remaining
140/// non-discarded frequencies are then averaged. The function returns the TSC frequency average, as
141/// well as a Vec of `TscMoment`s, which are all of the end moments that were associated with at
142/// least one non-discarded frequency.
143///
144/// # Arguments
145/// * `core` - Core that this function should run on.
146/// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
147/// * `num_samples` - Number of start and end `TscMoment`s to generate.
148/// * `calibration_duration` - How long to sleep in between gathering start and end moments.
149/// * `stdev_limit` - Number of standard deviations outside of which frequencies are discarded.
150fn calibrate_tsc_frequency(
151    rdtsc: fn() -> u64,
152    core: usize,
153    num_samples: usize,
154    calibration_duration: Duration,
155    stdev_limit: f64,
156) -> std::result::Result<(i128, Vec<TscMoment>), TscCalibrationError> {
157    set_cpu_affinity(vec![core])
158        .map_err(|e| TscCalibrationError::SetCpuAffinityError { core, err: e })?;
159
160    let starts: Vec<TscMoment> = (0..num_samples).map(|_| TscMoment::now(rdtsc)).collect();
161
162    std::thread::sleep(calibration_duration);
163
164    let ends: Vec<TscMoment> = (0..num_samples).map(|_| TscMoment::now(rdtsc)).collect();
165
166    let mut freqs = Vec::with_capacity(num_samples * num_samples);
167    for start in &starts {
168        for end in &ends {
169            freqs.push(TscMoment::measure_tsc_frequency(start, end))
170        }
171    }
172
173    let (lower_bound, upper_bound) = sort_and_get_bounds(&mut freqs, stdev_limit);
174
175    let mut good_samples: Vec<i128> = Vec::with_capacity(num_samples * num_samples);
176    let mut good_end_moments: HashSet<TscMoment> = HashSet::new();
177    for i in 0..num_samples {
178        for j in 0..num_samples {
179            let freq = freqs[i * num_samples + j];
180
181            if lower_bound < (freq as f64) && (freq as f64) < upper_bound {
182                good_end_moments.insert(ends[j]);
183                good_samples.push(freq);
184            }
185        }
186    }
187
188    Ok((
189        good_samples.iter().sum::<i128>() / good_samples.len() as i128,
190        Vec::from_iter(good_end_moments),
191    ))
192}
193
194/// Measure the TSC offset for `core` from core 0 where `reference_moments` were gathered.
195///
196/// This function first pins itself to `core`, then generates `num_samples` `TscMoment`s for this
197/// core, and then measures the TSC offset between those moments and all `reference_moments`. Any
198/// moments that are outside of `stddev_limit` standard deviations from the median offset are
199/// discarded, because they may represent an interrupt that occurred while a TscMoment was
200/// generated. The remaining offsets are averaged and returned as nanoseconds.
201///
202/// # Arguments
203/// * `core` - Core that this function should run on.
204/// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
205/// * `tsc_frequency` - TSC frequency measured from core 0.
206/// * `reference_moments` - `TscMoment`s gathered from core 0.
207/// * `num_samples` - Number of `TscMoment`s to generate on this thread for measuring the offset.
208/// * `stdev_limit` - Number of standard deviations outside of which offsets are discarded.
209fn measure_tsc_offset(
210    core: usize,
211    rdtsc: fn() -> u64,
212    tsc_frequency: u64,
213    reference_moments: Vec<TscMoment>,
214    num_samples: usize,
215    stdev_limit: f64,
216) -> std::result::Result<i128, TscCalibrationError> {
217    set_cpu_affinity(vec![core])
218        .map_err(|e| TscCalibrationError::SetCpuAffinityError { core, err: e })?;
219
220    let mut diffs: Vec<i128> = Vec::with_capacity(num_samples);
221
222    for _ in 0..num_samples {
223        let now = TscMoment::now(rdtsc);
224        for reference_moment in &reference_moments {
225            diffs.push(TscMoment::measure_tsc_offset(
226                reference_moment,
227                &now,
228                tsc_frequency,
229            ));
230        }
231    }
232
233    let (lower_bound, upper_bound) = sort_and_get_bounds(&mut diffs, stdev_limit);
234
235    let mut good_samples: Vec<i128> = Vec::with_capacity(num_samples);
236    for diff in &diffs {
237        if lower_bound < (*diff as f64) && (*diff as f64) < upper_bound {
238            good_samples.push(*diff);
239        }
240    }
241
242    let average_diff = good_samples.iter().sum::<i128>() / good_samples.len() as i128;
243
244    // Convert the diff to nanoseconds using the tsc_frequency
245    Ok(average_diff * 1_000_000_000 / tsc_frequency as i128)
246}
247
248/// Calibrate the TSC state.
249///
250/// This function first runs a TSC frequency calibration thread for 100ms, which is pinned to
251/// core0. The TSC calibration thread returns both the calibrated frequency, as well as a Vec of
252/// TscMoment objects which were validated to be accurate (meaning it's unlikely an interrupt
253/// occurred between moment's `time` and `tsc` values). This function then runs a tsc offset
254/// measurement thread for each core, which takes the TSC frequency and the Vec of TscMoments and
255/// measures whether or not the TSC values for that core are offset from core 0, and by how much.
256/// The frequency and the per-core offsets are returned as a TscState.
257pub fn calibrate_tsc_state() -> Result<TscState> {
258    calibrate_tsc_state_inner(
259        rdtsc_safe,
260        (0..base::number_of_logical_cores().context("Failed to get number of logical cores")?)
261            .collect(),
262    )
263}
264
265/// Actually calibrate the TSC state.
266///
267/// This function takes a customizable version of rdtsc and a specific set of cores to calibrate,
268/// which is helpful for testing calibration logic and error handling.
269///
270/// # Arguments
271///
272/// * `rdtsc` - Function for reading the TSC value, usually just runs RDTSC instruction.
273/// * `cores` - Cores to measure the TSC offset of.
274fn calibrate_tsc_state_inner(rdtsc: fn() -> u64, cores: Vec<usize>) -> Result<TscState> {
275    // For loops can't return values unfortunately
276    let mut calibration_contents: Option<(u64, Vec<TscMoment>)> = None;
277    for core in &cores {
278        // Copy the value of core to a moveable variable now.
279        let moved_core = *core;
280        let handle = std::thread::Builder::new()
281            .name(format!("tsc_calibration_core_{core}").to_string())
282            .spawn(move || {
283                calibrate_tsc_frequency(
284                    rdtsc,
285                    moved_core,
286                    TSC_CALIBRATION_SAMPLES,
287                    TSC_CALIBRATION_DURATION,
288                    TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT,
289                )
290            })
291            .map_err(|e| {
292                anyhow!(
293                    "TSC frequency calibration thread for core {} failed: {:?}",
294                    core,
295                    e
296                )
297            })?;
298
299        match handle.join() {
300            Ok(calibrate_result) => match calibrate_result {
301                Ok((freq, reference_moments)) => {
302                    if freq <= 0 {
303                        warn!(
304                            "TSC calibration on core {} resulted in TSC frequency of {} Hz, \
305                    trying on another core.",
306                            core, freq
307                        );
308                        continue;
309                    };
310                    calibration_contents = Some((freq as u64, reference_moments));
311                    break;
312                }
313
314                Err(TscCalibrationError::SetCpuAffinityError { core, err }) => {
315                    // There are several legitimate reasons why it might not be possible for crosvm
316                    // to run on some cores:
317                    //  1. Some cores may be offline.
318                    //  2. On Windows, the process affinity mask may not contain all cores.
319                    //
320                    // We thus just warn in this situation.
321                    warn!(
322                        "Failed to set thread affinity to {} during tsc frequency calibration due \
323                            to {}. This core is probably offline.",
324                        core, err
325                    );
326                }
327            },
328            // thread failed
329            Err(e) => {
330                return Err(anyhow!(
331                    "TSC frequency calibration thread for core {} failed: {:?}",
332                    core,
333                    e
334                ));
335            }
336        };
337    }
338
339    let (freq, reference_moments) =
340        calibration_contents.ok_or(anyhow!("Failed to calibrate TSC frequency on all cores"))?;
341
342    let mut offsets: Vec<(usize, i128)> = Vec::with_capacity(cores.len());
343    for core in cores {
344        let thread_reference_moments = reference_moments.clone();
345        let handle = std::thread::Builder::new()
346            .name(format!("measure_tsc_offset_core_{core}").to_string())
347            .spawn(move || {
348                measure_tsc_offset(
349                    core,
350                    rdtsc,
351                    freq,
352                    thread_reference_moments,
353                    TSC_CALIBRATION_SAMPLES,
354                    TSC_CALIBRATION_STANDARD_DEVIATION_LIMIT,
355                )
356            })
357            .map_err(|e| {
358                anyhow!(
359                    "TSC offset measurement thread for core {} failed: {:?}",
360                    core,
361                    e
362                )
363            })?;
364        let offset = match handle.join() {
365            // thread succeeded
366            Ok(measurement_result) => match measurement_result {
367                Ok(offset) => Some(offset),
368                Err(TscCalibrationError::SetCpuAffinityError { core, err }) => {
369                    // There are several legitimate reasons why it might not be possible for crosvm
370                    // to run on some cores:
371                    //  1. Some cores may be offline.
372                    //  2. On Windows, the process affinity mask may not contain all cores.
373                    //
374                    // We thus just warn in this situation.
375                    warn!(
376                        "Failed to set thread affinity to {} during tsc offset measurement due \
377                        to {}. This core is probably offline.",
378                        core, err
379                    );
380                    None
381                }
382            },
383            // thread failed
384            Err(e) => {
385                return Err(anyhow!(
386                    "TSC offset measurement thread for core {} failed: {:?}",
387                    core,
388                    e
389                ));
390            }
391        };
392
393        if let Some(offset) = offset {
394            offsets.push((core, offset));
395        }
396    }
397
398    TscState::new(freq, offsets, TSC_OFFSET_GROUPING_THRESHOLD)
399}
400
401#[cfg(test)]
402mod tests {
403    use std::arch::x86_64::__rdtscp;
404    use std::arch::x86_64::_rdtsc;
405
406    use super::*;
407
408    const ACCEPTABLE_OFFSET_MEASUREMENT_ERROR: i128 = 2_000i128;
409
410    #[test]
411    fn test_handle_offline_core() {
412        // This test imitates what would happen if a core is offline, and set_cpu_affinity fails.
413        // The calibration should not fail, and the extra core should not appear in the list of
414        // offsets.
415
416        let num_cores =
417            base::number_of_logical_cores().expect("number of logical cores should not fail");
418
419        let too_may_cores = num_cores + 2;
420        let host_state = calibrate_tsc_state_inner(rdtsc_safe, (0..too_may_cores).collect())
421            .expect("calibrate tsc state should not fail");
422
423        // First assert that the number of offsets measured is at most num_cores (it might be
424        // less if the current host has some offline cores).
425        assert!(host_state.offsets.len() <= num_cores);
426
427        for (core, _) in host_state.offsets {
428            // Assert that all offsets that we have are for cores 0..num_cores.
429            assert!(core < num_cores);
430        }
431    }
432
433    #[test]
434    fn test_frequency_higher_than_u32() {
435        // This test is making sure that we're not truncating our TSC frequencies in the case that
436        // they are greater than u32::MAX.
437
438        let host_state = calibrate_tsc_state_inner(
439            rdtsc_safe,
440            (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
441                .collect(),
442        )
443        .expect("failed to calibrate host freq");
444
445        // We use a static multiplier of 1000 here because the function has to be static (fn).
446        // 1000 should work for tsc frequency > 4.2MHz, which should apply to basically any
447        // processor. This if statement checks and bails early if that's not the case.
448        if host_state.frequency * 1000 < (u32::MAX as u64) {
449            return;
450        }
451
452        fn rdtsc_frequency_higher_than_u32() -> u64 {
453            // SAFETY: trivially safe
454            unsafe { _rdtsc() }.wrapping_mul(1000)
455        }
456
457        let state = calibrate_tsc_state_inner(
458            rdtsc_frequency_higher_than_u32,
459            (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
460                .collect(),
461        )
462        .unwrap();
463
464        let expected_freq = host_state.frequency * 1000;
465        let margin_of_error = expected_freq / 100;
466        assert!(state.frequency < expected_freq + margin_of_error);
467        assert!(state.frequency > expected_freq - margin_of_error);
468    }
469
470    #[test]
471    #[ignore]
472    fn test_offset_identification_core_0() {
473        fn rdtsc_with_core_0_offset_by_100_000() -> u64 {
474            let mut id = 0u32;
475            // SAFETY: trivially safe
476            let mut value = unsafe { __rdtscp(&mut id as *mut u32) };
477            if id == 0 {
478                value += 100_000;
479            }
480
481            value
482        }
483
484        // This test only works if the host has >=2 logical cores.
485        let num_cores =
486            base::number_of_logical_cores().expect("Failed to get number of logical cores");
487        if num_cores < 2 {
488            return;
489        }
490
491        let state = calibrate_tsc_state_inner(
492            rdtsc_with_core_0_offset_by_100_000,
493            (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
494                .collect(),
495        )
496        .unwrap();
497
498        for core in 0..num_cores {
499            let expected_offset_ns = if core > 0 {
500                -100_000i128 * 1_000_000_000i128 / state.frequency as i128
501            } else {
502                0i128
503            };
504            assert!(
505                state.offsets[core].1 < expected_offset_ns + ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
506            );
507            assert!(
508                state.offsets[core].1 > expected_offset_ns - ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
509            );
510        }
511    }
512
513    #[test]
514    #[ignore]
515    fn test_offset_identification_core_1() {
516        fn rdtsc_with_core_1_offset_by_100_000() -> u64 {
517            let mut id = 0u32;
518            // SAFETY: trivially safe
519            let mut value = unsafe { __rdtscp(&mut id as *mut u32) };
520            if id == 1 {
521                value += 100_000;
522            }
523
524            value
525        }
526
527        // This test only works if the host has >=2 logical cores.
528        let num_cores =
529            base::number_of_logical_cores().expect("Failed to get number of logical cores");
530        if num_cores < 2 {
531            return;
532        }
533
534        let state = calibrate_tsc_state_inner(
535            rdtsc_with_core_1_offset_by_100_000,
536            (0..base::number_of_logical_cores().expect("number of logical cores should not fail"))
537                .collect(),
538        )
539        .unwrap();
540
541        for core in 0..num_cores {
542            let expected_offset_ns = if core == 1 {
543                100_000i128 * 1_000_000_000i128 / state.frequency as i128
544            } else {
545                0i128
546            };
547            assert!(
548                state.offsets[core].1 < expected_offset_ns + ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
549            );
550            assert!(
551                state.offsets[core].1 > expected_offset_ns - ACCEPTABLE_OFFSET_MEASUREMENT_ERROR
552            );
553        }
554    }
555}