catapult_converter/
main.rs

1// Copyright 2022 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::collections::HashMap;
6use std::collections::HashSet;
7use std::fs;
8use std::hash::Hash;
9use std::hash::Hasher;
10
11use argh::FromArgs;
12use serde::Deserialize;
13use serde::Serialize;
14use serde_json::json;
15use serde_json::to_string_pretty;
16use serde_json::Number;
17use serde_json::Value;
18use uuid::Uuid;
19
20/// This tool takes results from Fuchsia performance tests (in Fuchsia's JSON perf test results
21/// format) and converts them to the Catapult Dashboard's JSON HistogramSet format.
22///
23/// See <https://cs.opensource.google/fuchsia/fuchsia/+/main:src/testing/catapult_converter/README.md>
24/// for details on arguments that are copied into output
25#[derive(FromArgs)]
26struct ConverterArgs {
27    /// input file: perf test results JSON file
28    #[argh(option, arg_name = "FILENAME")]
29    input: String,
30
31    /// output file: Catapult HistogramSet JSON file (default is stdout)
32    #[argh(option, arg_name = "FILENAME")]
33    output: Option<String>,
34
35    /// release version in the format 0.yyyymmdd.a.b if applicable. e.g. 0.20200101.1.2
36    #[argh(option, arg_name = "STRING")]
37    product_versions: Option<String>,
38
39    /// copied into output file as pointId, used to order results from different builds in a graph
40    #[argh(option, arg_name = "NUMBER")]
41    execution_timestamp_ms: i64,
42
43    /// copied into output file
44    #[argh(option, arg_name = "STRING")]
45    masters: String,
46
47    /// copied into output file
48    #[argh(option, arg_name = "STRING")]
49    bots: String,
50
51    /// copied into output file
52    #[argh(option, arg_name = "URL")]
53    log_url: String,
54}
55
56#[derive(Deserialize, Debug)]
57enum FuchsiaPerfUnit {
58    #[serde(alias = "nanoseconds", alias = "ns")]
59    NanoSeconds,
60    #[serde(alias = "milliseconds", alias = "ms")]
61    Milliseconds,
62    #[serde(alias = "bytes/second")]
63    BytesPerSecond,
64    #[serde(alias = "bits/second")]
65    BitsPerSecond,
66    #[serde(alias = "bytes")]
67    Bytes,
68    #[serde(alias = "frames/second")]
69    FramesPerSecond,
70    #[serde(alias = "percent")]
71    Percent,
72    #[serde(alias = "count")]
73    Count,
74    Watts,
75}
76
77#[derive(Serialize, Debug)]
78enum HistogramUnit {
79    #[serde(rename = "ms_smallerIsBetter")]
80    Milliseconds,
81    #[serde(rename = "unitless_biggerIsBetter")]
82    UnitlessBiggerIsBetter,
83    #[serde(rename = "sizeInBytes_smallerIsBetter")]
84    Bytes,
85    #[serde(rename = "Hz_biggerIsBetter")]
86    FramesPerSecond,
87    #[serde(rename = "n%_smallerIsBetter")]
88    Percent,
89    #[serde(rename = "count")]
90    Count,
91    #[serde(rename = "W_smallerIsBetter")]
92    Watts,
93}
94
95#[derive(Deserialize, Debug)]
96struct FuchsiaPerf {
97    #[serde(alias = "label")]
98    test_name: String,
99    metric: Option<String>,
100    test_suite: String,
101    unit: FuchsiaPerfUnit,
102    values: Vec<f64>,
103}
104
105fn convert_unit(input_unit: FuchsiaPerfUnit, values: &mut [f64]) -> HistogramUnit {
106    match input_unit {
107        FuchsiaPerfUnit::NanoSeconds => {
108            for value in values.iter_mut() {
109                *value /= 1e6;
110            }
111            HistogramUnit::Milliseconds
112        }
113        FuchsiaPerfUnit::Milliseconds => HistogramUnit::Milliseconds,
114        // The Catapult dashboard does not yet support a "bytes per unit time"
115        // unit (of any multiple), and it rejects unknown units, so we report
116        // this as "unitless" here for now.
117        FuchsiaPerfUnit::BytesPerSecond => HistogramUnit::UnitlessBiggerIsBetter,
118        FuchsiaPerfUnit::BitsPerSecond => {
119            // convert to bytes/s to be consistent with bytes/second
120            for value in values.iter_mut() {
121                *value /= 8.0;
122            }
123            HistogramUnit::UnitlessBiggerIsBetter
124        }
125        FuchsiaPerfUnit::Bytes => HistogramUnit::Bytes,
126        FuchsiaPerfUnit::FramesPerSecond => HistogramUnit::FramesPerSecond,
127        FuchsiaPerfUnit::Percent => HistogramUnit::Percent,
128        FuchsiaPerfUnit::Count => HistogramUnit::Count,
129        FuchsiaPerfUnit::Watts => HistogramUnit::Watts,
130    }
131}
132
133#[derive(Serialize, Clone, Debug, Eq)]
134struct Diagnostic {
135    guid: String,
136    #[serde(rename = "type", default = "GenericSet")]
137    diag_type: String,
138    values: Vec<Value>,
139}
140
141impl Hash for Diagnostic {
142    fn hash<H: Hasher>(&self, state: &mut H) {
143        self.guid.hash(state);
144    }
145}
146
147impl PartialEq<Self> for Diagnostic {
148    fn eq(&self, other: &Self) -> bool {
149        self.guid == other.guid
150    }
151}
152
153impl Default for Diagnostic {
154    fn default() -> Self {
155        Diagnostic {
156            guid: Uuid::new_v4().to_string(),
157            diag_type: "GenericSet".to_string(),
158            values: Vec::default(),
159        }
160    }
161}
162
163#[derive(Serialize, Debug)]
164struct Histogram {
165    name: String,
166    unit: HistogramUnit,
167    #[serde(default)]
168    description: String,
169    diagnostics: HashMap<&'static str, String>,
170    // serde_json converts NaN / infinite to null by default
171    running: Vec<Option<Number>>,
172    guid: String,
173    #[serde(rename = "maxNumSampleValues")]
174    max_num_sample_values: u64,
175    #[serde(rename = "numNans", default)]
176    num_nans: u64,
177}
178
179impl Histogram {
180    fn new(
181        test_name: &str,
182        unit: FuchsiaPerfUnit,
183        diagnostics: HashMap<&'static str, String>,
184        original_values: Vec<f64>,
185    ) -> Self {
186        let mut values = original_values;
187        let output_unit = convert_unit(unit, &mut values);
188
189        let mut stats: Vec<Option<Number>> = Vec::new();
190        let mean: f64 = values.iter().sum::<f64>() / values.len() as f64;
191
192        // count
193        stats.push(Some(values.len().into()));
194
195        // max
196        stats.push(Number::from_f64(
197            values.iter().cloned().max_by(f64::total_cmp).unwrap(),
198        ));
199
200        // meanlogs
201        stats.push(Number::from_f64(
202            values.iter().map(|x| f64::ln(*x)).sum::<f64>() / values.len() as f64,
203        ));
204
205        // mean
206        stats.push(Number::from_f64(mean));
207
208        // min
209        stats.push(Number::from_f64(
210            values.iter().cloned().min_by(f64::total_cmp).unwrap(),
211        ));
212
213        // sum
214        stats.push(Number::from_f64(values.iter().sum()));
215
216        // variance
217        // Bessel's correction applied. Bessel's correction gives us a better estimation of
218        // the population's variance given a sample of the population.
219        stats.push(Number::from_f64(if values.len() <= 1 {
220            0.0
221        } else {
222            values
223                .iter()
224                .map(|x| (*x - mean) * (*x - mean))
225                .sum::<f64>()
226                / (values.len() - 1) as f64
227        }));
228
229        Histogram {
230            name: test_name.to_string(),
231            unit: output_unit,
232            description: "".to_string(),
233            diagnostics,
234            running: stats,
235            guid: Uuid::new_v4().to_string(),
236            max_num_sample_values: values.len() as u64,
237            // Assume for now that we didn't get any NaN values.
238            num_nans: 0,
239        }
240    }
241}
242
243fn build_shared_diagnostic_map(
244    args: &ConverterArgs,
245) -> (HashMap<&'static str, String>, HashSet<Diagnostic>) {
246    let mut diag_map = HashMap::new();
247    let mut diag_set = HashSet::new();
248
249    let diag = Diagnostic {
250        values: vec![json!(args.execution_timestamp_ms)],
251        ..Default::default()
252    };
253    diag_set.insert(diag.clone());
254    diag_map.insert("pointId", diag.guid);
255
256    let diag = Diagnostic {
257        values: vec![json!(args.bots)],
258        ..Default::default()
259    };
260    diag_set.insert(diag.clone());
261    diag_map.insert("bots", diag.guid);
262
263    let diag = Diagnostic {
264        values: vec![json!(args.masters)],
265        ..Default::default()
266    };
267    diag_set.insert(diag.clone());
268    diag_map.insert("masters", diag.guid);
269
270    if let Some(version) = &args.product_versions {
271        let diag = Diagnostic {
272            values: vec![json!(version)],
273            ..Default::default()
274        };
275        diag_set.insert(diag.clone());
276        diag_map.insert("a_productVersions", diag.guid);
277    }
278    let diag = Diagnostic {
279        values: vec![json!(vec!("Build Log".to_string(), args.log_url.clone()))],
280        ..Default::default()
281    };
282    diag_set.insert(diag.clone());
283    diag_map.insert("logUrls", diag.guid);
284    (diag_map, diag_set)
285}
286
287#[derive(Serialize, Debug)]
288#[serde(untagged)]
289enum HistogramSetElement {
290    Diagnostic(Diagnostic),
291    Histogram(Histogram),
292}
293
294fn main() {
295    let args: ConverterArgs = argh::from_env();
296    let content = fs::read_to_string(&args.input)
297        .expect("Failed to read the file, have you specified the correct path?");
298
299    let perf_data: Vec<FuchsiaPerf> =
300        serde_json::from_str(&content).expect("Failed to parse input data file");
301
302    let (shared_diag_map, mut diag_set) = build_shared_diagnostic_map(&args);
303
304    let mut test_suite_guid_map = HashMap::new();
305
306    for test_result in &perf_data {
307        if !test_suite_guid_map.contains_key(&test_result.test_suite) {
308            let new_uuid = Uuid::new_v4().to_string();
309            test_suite_guid_map.insert(test_result.test_suite.clone(), new_uuid.to_owned());
310            diag_set.insert(Diagnostic {
311                values: vec![json!(test_result.test_suite)],
312                guid: new_uuid,
313                ..Default::default()
314            });
315        }
316    }
317
318    let mut output = Vec::<HistogramSetElement>::new();
319    output.extend(
320        diag_set
321            .iter()
322            .cloned()
323            .map(HistogramSetElement::Diagnostic),
324    );
325
326    for test_result in perf_data {
327        let mut diag_map = shared_diag_map.clone();
328        diag_map.insert(
329            "benchmarks",
330            test_suite_guid_map[&test_result.test_suite].clone(),
331        );
332
333        let mut name = test_result.test_name.clone();
334        if let Some(metric) = &test_result.metric {
335            if metric != "real_time" {
336                name += "/";
337                name += metric.as_str();
338            }
339        }
340
341        output.push(HistogramSetElement::Histogram(Histogram::new(
342            name.replace(' ', "_").as_str(),
343            test_result.unit,
344            diag_map,
345            test_result.values,
346        )));
347    }
348
349    let serialized_output = to_string_pretty(&output).expect("Unable to serialize result");
350
351    match &args.output {
352        Some(file_name) => fs::write(file_name, serialized_output).unwrap(),
353        None => println!("{serialized_output}"),
354    }
355}