1use std::fs::File;
6use std::path::PathBuf;
7use std::sync::atomic::AtomicU32;
8use std::sync::atomic::Ordering;
9use std::sync::Arc;
10use std::time::Duration;
11
12use anyhow::Context;
13use base::sched_attr;
14use base::sched_setattr;
15use base::set_cpu_affinity;
16use base::warn;
17use base::Error;
18use base::Event;
19use base::EventToken;
20use base::Timer;
21use base::TimerTrait;
22use base::Tube;
23use base::WaitContext;
24use base::WorkerThread;
25use sync::Mutex;
26
27use crate::pci::CrosvmDeviceId;
28use crate::BusAccessInfo;
29use crate::BusDevice;
30use crate::DeviceId;
31use crate::Suspendable;
32
33const CPUFREQ_GOV_SCALE_FACTOR_DEFAULT: u32 = 100;
34const CPUFREQ_GOV_SCALE_FACTOR_SCHEDUTIL: u32 = 80;
35
36const SCHED_FLAG_RESET_ON_FORK: u64 = 0x1;
37const SCHED_FLAG_KEEP_POLICY: u64 = 0x08;
38const SCHED_FLAG_KEEP_PARAMS: u64 = 0x10;
39const SCHED_FLAG_UTIL_CLAMP_MIN: u64 = 0x20;
40const SCHED_FLAG_UTIL_CLAMP_MAX: u64 = 0x40;
41
42const VCPUFREQ_CUR_PERF: u32 = 0x0;
43const VCPUFREQ_SET_PERF: u32 = 0x4;
44const VCPUFREQ_FREQTBL_LEN: u32 = 0x8;
45const VCPUFREQ_FREQTBL_SEL: u32 = 0xc;
46const VCPUFREQ_FREQTBL_RD: u32 = 0x10;
47const VCPUFREQ_PERF_DOMAIN: u32 = 0x14;
48
49const SCHED_FLAG_KEEP_ALL: u64 = SCHED_FLAG_KEEP_POLICY | SCHED_FLAG_KEEP_PARAMS;
50const SCHED_CAPACITY_SCALE: u32 = 1024;
51
52const MIN_TIMER_US: u32 = 75;
54const TIMER_OVERHEAD_US: u32 = 15;
55
56pub struct VirtCpufreqV2 {
58 vcpu_freq_table: Vec<u32>,
59 pcpu_fmax: u32,
60 pcpu_capacity: u32,
61 pcpu: u32,
62 util_factor: u32,
63 freqtbl_sel: u32,
64 vcpu_domain: u32,
65 domain_uclamp_min: Option<File>,
66 domain_uclamp_max: Option<File>,
67 vcpu_fmax: u32,
68 vcpu_capacity: u32,
69 vcpu_relative_capacity: u32,
70 worker: Option<WorkerThread<()>>,
71 timer: Arc<Mutex<Timer>>,
72 vm_ctrl: Arc<Mutex<Tube>>,
73 pcpu_min_cap: u32,
74 largest_pcpu_idx: usize,
81 shared_domain_vcpus: Vec<usize>,
83 shared_domain_perf: Arc<AtomicU32>,
84}
85
86fn get_cpu_info(cpu_id: u32, property: &str) -> Result<u32, Error> {
87 let path = format!("/sys/devices/system/cpu/cpu{cpu_id}/{property}");
88 std::fs::read_to_string(path)?
89 .trim()
90 .parse()
91 .map_err(|_| Error::new(libc::EINVAL))
92}
93
94fn get_cpu_info_str(cpu_id: u32, property: &str) -> Result<String, Error> {
95 let path = format!("/sys/devices/system/cpu/cpu{cpu_id}/{property}");
96 std::fs::read_to_string(path).map_err(|_| Error::new(libc::EINVAL))
97}
98
99fn get_cpu_capacity(cpu_id: u32) -> Result<u32, Error> {
100 get_cpu_info(cpu_id, "cpu_capacity")
101}
102
103fn get_cpu_maxfreq_khz(cpu_id: u32) -> Result<u32, Error> {
104 get_cpu_info(cpu_id, "cpufreq/cpuinfo_max_freq")
105}
106
107fn get_cpu_minfreq_khz(cpu_id: u32) -> Result<u32, Error> {
108 get_cpu_info(cpu_id, "cpufreq/cpuinfo_min_freq")
109}
110
111fn get_cpu_curfreq_khz(cpu_id: u32) -> Result<u32, Error> {
112 get_cpu_info(cpu_id, "cpufreq/scaling_cur_freq")
113}
114
115fn get_cpu_util_factor(cpu_id: u32) -> Result<u32, Error> {
116 let gov = get_cpu_info_str(cpu_id, "cpufreq/scaling_governor")?;
117 match gov.trim() {
118 "schedutil" => Ok(CPUFREQ_GOV_SCALE_FACTOR_SCHEDUTIL),
119 _ => Ok(CPUFREQ_GOV_SCALE_FACTOR_DEFAULT),
120 }
121}
122
123impl VirtCpufreqV2 {
124 pub fn new(
125 pcpu: u32,
126 vcpu_freq_table: Vec<u32>,
127 vcpu_domain_path: Option<PathBuf>,
128 vcpu_domain: u32,
129 vcpu_capacity: u32,
130 largest_pcpu_idx: usize,
131 vm_ctrl: Arc<Mutex<Tube>>,
132 shared_domain_vcpus: Vec<usize>,
133 shared_domain_perf: Arc<AtomicU32>,
134 ) -> Self {
135 let pcpu_capacity = get_cpu_capacity(pcpu).expect("Error reading capacity");
136 let pcpu_fmax = get_cpu_maxfreq_khz(pcpu).expect("Error reading max freq");
137 let util_factor = get_cpu_util_factor(pcpu).expect("Error getting util factor");
138 let freqtbl_sel = 0;
139 let mut domain_uclamp_min = None;
140 let mut domain_uclamp_max = None;
141 let vcpu_fmax = vcpu_freq_table.clone().into_iter().max().unwrap();
146 let vcpu_relative_capacity =
147 u32::try_from(u64::from(vcpu_capacity) * u64::from(pcpu_fmax) / u64::from(vcpu_fmax))
148 .unwrap();
149 let pcpu_min_cap =
150 get_cpu_minfreq_khz(pcpu).expect("Error reading min freq") * pcpu_capacity / pcpu_fmax;
151
152 if let Some(cgroup_path) = &vcpu_domain_path {
153 domain_uclamp_min = Some(
154 File::create(cgroup_path.join("cpu.uclamp.min")).unwrap_or_else(|err| {
155 panic!(
156 "Err: {}, Unable to open: {}",
157 err,
158 cgroup_path.join("cpu.uclamp.min").display()
159 )
160 }),
161 );
162 domain_uclamp_max = Some(
163 File::create(cgroup_path.join("cpu.uclamp.max")).unwrap_or_else(|err| {
164 panic!(
165 "Err: {}, Unable to open: {}",
166 err,
167 cgroup_path.join("cpu.uclamp.max").display()
168 )
169 }),
170 );
171 }
172
173 VirtCpufreqV2 {
174 vcpu_freq_table,
175 pcpu_fmax,
176 pcpu_capacity,
177 pcpu,
178 util_factor,
179 freqtbl_sel,
180 vcpu_domain,
181 domain_uclamp_min,
182 domain_uclamp_max,
183 vcpu_fmax,
184 vcpu_capacity,
185 vcpu_relative_capacity,
186 worker: None,
187 timer: Arc::new(Mutex::new(Timer::new().expect("failed to create Timer"))),
188 vm_ctrl,
189 pcpu_min_cap,
190 largest_pcpu_idx,
191 shared_domain_vcpus,
192 shared_domain_perf,
193 }
194 }
195}
196
197impl BusDevice for VirtCpufreqV2 {
198 fn device_id(&self) -> DeviceId {
199 CrosvmDeviceId::VirtCpufreq.into()
200 }
201
202 fn debug_label(&self) -> String {
203 "VirtCpufreq Device".to_owned()
204 }
205
206 fn read(&mut self, info: BusAccessInfo, data: &mut [u8]) {
207 if data.len() != std::mem::size_of::<u32>() {
208 warn!(
209 "{}: unsupported read length {}, only support 4bytes read",
210 self.debug_label(),
211 data.len()
212 );
213 return;
214 }
215
216 let val = match info.offset as u32 {
217 VCPUFREQ_CUR_PERF => {
218 let shared_util = self.shared_domain_perf.load(Ordering::SeqCst);
219 if shared_util != 0 && shared_util < self.pcpu_min_cap {
220 shared_util * self.vcpu_fmax / self.vcpu_capacity
221 } else {
222 match get_cpu_curfreq_khz(self.pcpu) {
223 Ok(freq) => u32::try_from(
224 u64::from(freq) * u64::from(self.pcpu_capacity)
225 / u64::from(self.vcpu_relative_capacity),
226 )
227 .unwrap(),
228 Err(_) => 0,
229 }
230 }
231 }
232 VCPUFREQ_FREQTBL_LEN => self.vcpu_freq_table.len() as u32,
233 VCPUFREQ_PERF_DOMAIN => self.vcpu_domain,
234 VCPUFREQ_FREQTBL_RD => *self
235 .vcpu_freq_table
236 .get(self.freqtbl_sel as usize)
237 .unwrap_or(&0),
238 _ => {
239 warn!("{}: unsupported read address {}", self.debug_label(), info);
240 return;
241 }
242 };
243
244 let val_arr = val.to_ne_bytes();
245 data.copy_from_slice(&val_arr);
246 }
247
248 fn write(&mut self, info: BusAccessInfo, data: &[u8]) {
249 let val: u32 = match data.try_into().map(u32::from_ne_bytes) {
250 Ok(v) => v,
251 Err(e) => {
252 warn!(
253 "{}: unsupported write length {:#}, only support 4bytes write",
254 self.debug_label(),
255 e
256 );
257 return;
258 }
259 };
260
261 match info.offset as u32 {
262 VCPUFREQ_SET_PERF => {
263 let util_raw = match u32::try_from(
265 u64::from(self.vcpu_capacity) * u64::from(val) / u64::from(self.vcpu_fmax),
266 ) {
267 Ok(util) => util,
268 Err(e) => {
269 warn!("Potential overflow {:#}", e);
270 SCHED_CAPACITY_SCALE
271 }
272 };
273
274 let util = util_raw * self.util_factor / CPUFREQ_GOV_SCALE_FACTOR_DEFAULT;
275
276 if let (Some(domain_uclamp_min), Some(domain_uclamp_max)) =
277 (&mut self.domain_uclamp_min, &mut self.domain_uclamp_max)
278 {
279 use std::io::Write;
280 let val = util as f32 * 100.0 / SCHED_CAPACITY_SCALE as f32;
281 let val_formatted = format!("{val:4}").into_bytes();
282
283 if self.vcpu_fmax != self.pcpu_fmax {
284 if let Err(e) = domain_uclamp_max.write(&val_formatted) {
285 warn!("Error setting uclamp_max: {:#}", e);
286 }
287 }
288 if let Err(e) = domain_uclamp_min.write(&val_formatted) {
289 warn!("Error setting uclamp_min: {:#}", e);
290 }
291 } else {
292 let mut sched_attr = sched_attr {
293 sched_flags: SCHED_FLAG_KEEP_ALL
294 | SCHED_FLAG_UTIL_CLAMP_MIN
295 | SCHED_FLAG_UTIL_CLAMP_MAX
296 | SCHED_FLAG_RESET_ON_FORK,
297 sched_util_min: util,
298 ..Default::default()
299 };
300
301 if self.vcpu_fmax != self.pcpu_fmax {
302 sched_attr.sched_util_max = util;
303 } else {
304 sched_attr.sched_util_max = SCHED_CAPACITY_SCALE;
305 }
306
307 if let Err(e) = sched_setattr(0, &mut sched_attr, 0) {
308 panic!("{}: Error setting util value: {:#}", self.debug_label(), e);
309 }
310 }
311
312 if self.vcpu_fmax == self.pcpu_fmax {
315 return;
316 }
317
318 self.shared_domain_perf.store(util_raw, Ordering::SeqCst);
319 let timer = self.timer.clone();
320 if self.worker.is_none() {
321 let vcpu_id = info.id;
322 let vm_ctrl = self.vm_ctrl.clone();
323 let worker_cpu_affinity = self.largest_pcpu_idx + self.vcpu_domain as usize + 1;
324 let shared_domain_vcpus = self.shared_domain_vcpus.clone();
325
326 self.worker = Some(WorkerThread::start(
327 format!("vcpu_throttle{vcpu_id}"),
328 move |kill_evt| {
329 vcpufreq_worker_thread(
330 shared_domain_vcpus,
331 kill_evt,
332 timer,
333 vm_ctrl,
334 worker_cpu_affinity,
335 )
336 .expect("error running vpucfreq_worker")
337 },
338 ));
339 } else if util_raw < self.pcpu_min_cap {
340 let timeout_period = (MIN_TIMER_US + TIMER_OVERHEAD_US) as f32
350 / (1.0 - (util_raw as f32 / self.pcpu_min_cap as f32));
351 let _ = timer
352 .lock()
353 .reset_repeating(Duration::from_micros(timeout_period as u64));
354 } else {
355 let _ = timer.lock().clear();
356 }
357 }
358 VCPUFREQ_FREQTBL_SEL => self.freqtbl_sel = val,
359 _ => {
360 warn!("{}: unsupported read address {}", self.debug_label(), info);
361 }
362 }
363 }
364}
365
366pub fn vcpufreq_worker_thread(
367 shared_domain_vcpus: Vec<usize>,
368 kill_evt: Event,
369 timer: Arc<Mutex<Timer>>,
370 vm_ctrl: Arc<Mutex<Tube>>,
371 cpu_affinity: usize,
372) -> anyhow::Result<()> {
373 #[derive(EventToken)]
374 enum Token {
375 TimerExpire,
377 Kill,
379 }
380
381 let wait_ctx = WaitContext::build_with(&[
382 (&*timer.lock(), Token::TimerExpire),
383 (&kill_evt, Token::Kill),
384 ])
385 .context("Failed to create wait_ctx")?;
386
387 let cpu_set: Vec<usize> = vec![cpu_affinity];
390 set_cpu_affinity(cpu_set)?;
391
392 let mut sched_attr = sched_attr {
393 sched_flags: SCHED_FLAG_KEEP_ALL
394 | SCHED_FLAG_UTIL_CLAMP_MIN
395 | SCHED_FLAG_UTIL_CLAMP_MAX
396 | SCHED_FLAG_RESET_ON_FORK,
397 sched_util_min: SCHED_CAPACITY_SCALE,
398 sched_util_max: SCHED_CAPACITY_SCALE,
399 ..Default::default()
400 };
401 if let Err(e) = sched_setattr(0, &mut sched_attr, 0) {
402 warn!("Error setting util value: {}", e);
403 }
404
405 loop {
406 let events = wait_ctx.wait().context("Failed to wait for events")?;
407 for event in events.iter().filter(|e| e.is_readable) {
408 match event.token {
409 Token::TimerExpire => {
410 timer
411 .lock()
412 .mark_waited()
413 .context("failed to reset timer")?;
414 let vm_ctrl_unlocked = vm_ctrl.lock();
415 for vcpu_id in &shared_domain_vcpus {
416 let msg = vm_control::VmRequest::Throttle(*vcpu_id, MIN_TIMER_US);
417 vm_ctrl_unlocked
418 .send(&msg)
419 .context("failed to stall vCPUs")?;
420 }
421 }
422 Token::Kill => {
423 return Ok(());
424 }
425 }
426 }
427 }
428}
429
430impl Suspendable for VirtCpufreqV2 {}