base/sys/linux/
mod.rs

1// Copyright 2017 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Small system utility modules for usage by other modules.
6
7#[cfg(target_os = "android")]
8mod android;
9#[cfg(target_os = "android")]
10use android as target_os;
11#[cfg(target_os = "linux")]
12#[allow(clippy::module_inception)]
13mod linux;
14#[cfg(target_os = "linux")]
15use linux as target_os;
16use log::warn;
17#[macro_use]
18pub mod ioctl;
19#[macro_use]
20pub mod syslog;
21mod acpi_event;
22mod capabilities;
23mod descriptor;
24mod event;
25mod file;
26mod file_traits;
27mod mmap;
28mod net;
29mod netlink;
30mod notifiers;
31pub mod platform_timer_resolution;
32mod poll;
33mod priority;
34mod sched;
35mod shm;
36pub mod signal;
37mod signalfd;
38mod terminal;
39mod timer;
40pub mod vsock;
41mod write_zeroes;
42
43use std::ffi::CString;
44use std::fs::remove_file;
45use std::fs::File;
46use std::fs::OpenOptions;
47use std::mem;
48use std::mem::MaybeUninit;
49use std::ops::Deref;
50use std::os::unix::io::FromRawFd;
51use std::os::unix::io::RawFd;
52use std::os::unix::net::UnixDatagram;
53use std::os::unix::net::UnixListener;
54use std::os::unix::process::ExitStatusExt;
55use std::path::Path;
56use std::path::PathBuf;
57use std::process::ExitStatus;
58use std::ptr;
59use std::sync::OnceLock;
60use std::time::Duration;
61
62pub use acpi_event::*;
63pub use capabilities::drop_capabilities;
64pub use event::EventExt;
65pub(crate) use event::PlatformEvent;
66pub use file::find_next_data;
67pub use file::FileDataIterator;
68pub(crate) use file_traits::lib::*;
69pub use ioctl::*;
70use libc::c_int;
71use libc::c_long;
72use libc::fcntl;
73use libc::pipe2;
74use libc::prctl;
75use libc::syscall;
76use libc::waitpid;
77use libc::SYS_getpid;
78use libc::SYS_getppid;
79use libc::SYS_gettid;
80use libc::EINVAL;
81use libc::O_CLOEXEC;
82use libc::PR_SET_NAME;
83use libc::SIGKILL;
84use libc::WNOHANG;
85pub use mmap::*;
86pub(in crate::sys) use net::sendmsg_nosignal as sendmsg;
87pub(in crate::sys) use net::sockaddr_un;
88pub(in crate::sys) use net::sockaddrv4_to_lib_c;
89pub(in crate::sys) use net::sockaddrv6_to_lib_c;
90pub use netlink::*;
91pub use poll::EventContext;
92pub use priority::*;
93pub use sched::*;
94pub use shm::MemfdSeals;
95pub use shm::SharedMemoryLinux;
96pub use signal::*;
97pub use signalfd::Error as SignalFdError;
98pub use signalfd::*;
99pub use terminal::*;
100pub(crate) use write_zeroes::file_punch_hole;
101pub(crate) use write_zeroes::file_write_zeroes_at;
102
103use crate::descriptor::FromRawDescriptor;
104use crate::descriptor::SafeDescriptor;
105pub use crate::errno::Error;
106pub use crate::errno::Result;
107pub use crate::errno::*;
108use crate::number_of_logical_cores;
109use crate::round_up_to_page_size;
110pub use crate::sys::unix::descriptor::*;
111use crate::syscall;
112use crate::AsRawDescriptor;
113use crate::Pid;
114
115/// Re-export libc types that are part of the API.
116pub type Uid = libc::uid_t;
117pub type Gid = libc::gid_t;
118pub type Mode = libc::mode_t;
119
120/// Safe wrapper for PR_SET_NAME(2const)
121#[inline(always)]
122pub fn set_thread_name(name: &str) -> Result<()> {
123    let name = CString::new(name).or(Err(Error::new(EINVAL)))?;
124    // SAFETY: prctl copies name and doesn't expect it to outlive this function.
125    let ret = unsafe { prctl(PR_SET_NAME, name.as_c_str()) };
126    if ret == 0 {
127        Ok(())
128    } else {
129        errno_result()
130    }
131}
132
133/// This bypasses `libc`'s caching `getpid(2)` wrapper which can be invalid if a raw clone was used
134/// elsewhere.
135#[inline(always)]
136pub fn getpid() -> Pid {
137    // SAFETY:
138    // Safe because this syscall can never fail and we give it a valid syscall number.
139    unsafe { syscall(SYS_getpid as c_long) as Pid }
140}
141
142/// Safe wrapper for the geppid Linux systemcall.
143#[inline(always)]
144pub fn getppid() -> Pid {
145    // SAFETY:
146    // Safe because this syscall can never fail and we give it a valid syscall number.
147    unsafe { syscall(SYS_getppid as c_long) as Pid }
148}
149
150/// Safe wrapper for the gettid Linux systemcall.
151pub fn gettid() -> Pid {
152    // SAFETY:
153    // Calling the gettid() sycall is always safe.
154    unsafe { syscall(SYS_gettid as c_long) as Pid }
155}
156
157/// Safe wrapper for `geteuid(2)`.
158#[inline(always)]
159pub fn geteuid() -> Uid {
160    // SAFETY:
161    // trivially safe
162    unsafe { libc::geteuid() }
163}
164
165/// Safe wrapper for `getegid(2)`.
166#[inline(always)]
167pub fn getegid() -> Gid {
168    // SAFETY:
169    // trivially safe
170    unsafe { libc::getegid() }
171}
172
173/// The operation to perform with `flock`.
174pub enum FlockOperation {
175    LockShared,
176    LockExclusive,
177    Unlock,
178}
179
180/// Safe wrapper for flock(2) with the operation `op` and optionally `nonblocking`. The lock will be
181/// dropped automatically when `file` is dropped.
182#[inline(always)]
183pub fn flock<F: AsRawDescriptor>(file: &F, op: FlockOperation, nonblocking: bool) -> Result<()> {
184    let mut operation = match op {
185        FlockOperation::LockShared => libc::LOCK_SH,
186        FlockOperation::LockExclusive => libc::LOCK_EX,
187        FlockOperation::Unlock => libc::LOCK_UN,
188    };
189
190    if nonblocking {
191        operation |= libc::LOCK_NB;
192    }
193
194    // SAFETY:
195    // Safe since we pass in a valid fd and flock operation, and check the return value.
196    syscall!(unsafe { libc::flock(file.as_raw_descriptor(), operation) }).map(|_| ())
197}
198
199/// The operation to perform with `fallocate`.
200pub enum FallocateMode {
201    PunchHole,
202    ZeroRange,
203    Allocate,
204}
205
206impl From<FallocateMode> for i32 {
207    fn from(value: FallocateMode) -> Self {
208        match value {
209            FallocateMode::Allocate => libc::FALLOC_FL_KEEP_SIZE,
210            FallocateMode::PunchHole => libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
211            FallocateMode::ZeroRange => libc::FALLOC_FL_ZERO_RANGE | libc::FALLOC_FL_KEEP_SIZE,
212        }
213    }
214}
215
216impl From<FallocateMode> for u32 {
217    fn from(value: FallocateMode) -> Self {
218        Into::<i32>::into(value) as u32
219    }
220}
221
222/// Safe wrapper for `fallocate()`.
223pub fn fallocate<F: AsRawDescriptor>(
224    file: &F,
225    mode: FallocateMode,
226    offset: u64,
227    len: u64,
228) -> Result<()> {
229    let offset = if offset > libc::off64_t::MAX as u64 {
230        return Err(Error::new(libc::EINVAL));
231    } else {
232        offset as libc::off64_t
233    };
234
235    let len = if len > libc::off64_t::MAX as u64 {
236        return Err(Error::new(libc::EINVAL));
237    } else {
238        len as libc::off64_t
239    };
240
241    // SAFETY:
242    // Safe since we pass in a valid fd and fallocate mode, validate offset and len,
243    // and check the return value.
244    syscall!(unsafe { libc::fallocate64(file.as_raw_descriptor(), mode.into(), offset, len) })
245        .map(|_| ())
246}
247
248/// Safe wrapper for `fstat()`.
249pub fn fstat<F: AsRawDescriptor>(f: &F) -> Result<libc::stat64> {
250    let mut st = MaybeUninit::<libc::stat64>::zeroed();
251
252    // SAFETY:
253    // Safe because the kernel will only write data in `st` and we check the return
254    // value.
255    syscall!(unsafe { libc::fstat64(f.as_raw_descriptor(), st.as_mut_ptr()) })?;
256
257    // SAFETY:
258    // Safe because the kernel guarantees that the struct is now fully initialized.
259    Ok(unsafe { st.assume_init() })
260}
261
262/// Checks whether a file is a block device fie or not.
263pub fn is_block_file<F: AsRawDescriptor>(file: &F) -> Result<bool> {
264    let stat = fstat(file)?;
265    Ok((stat.st_mode & libc::S_IFMT) == libc::S_IFBLK)
266}
267
268const BLOCK_IO_TYPE: u32 = 0x12;
269ioctl_io_nr!(BLKDISCARD, BLOCK_IO_TYPE, 119);
270
271/// Discards the given range of a block file.
272pub fn discard_block<F: AsRawDescriptor>(file: &F, offset: u64, len: u64) -> Result<()> {
273    let range: [u64; 2] = [offset, len];
274    // SAFETY:
275    // Safe because
276    // - we check the return value.
277    // - ioctl(BLKDISCARD) does not hold the descriptor after the call.
278    // - ioctl(BLKDISCARD) does not break the file descriptor.
279    // - ioctl(BLKDISCARD) does not modify the given range.
280    syscall!(unsafe { libc::ioctl(file.as_raw_descriptor(), BLKDISCARD, &range) }).map(|_| ())
281}
282
283/// A trait used to abstract types that provide a process id that can be operated on.
284pub trait AsRawPid {
285    fn as_raw_pid(&self) -> Pid;
286}
287
288impl AsRawPid for Pid {
289    fn as_raw_pid(&self) -> Pid {
290        *self
291    }
292}
293
294impl AsRawPid for std::process::Child {
295    fn as_raw_pid(&self) -> Pid {
296        self.id() as Pid
297    }
298}
299
300/// A safe wrapper around waitpid.
301///
302/// On success if a process was reaped, it will be returned as the first value.
303/// The second returned value is the ExitStatus from the libc::waitpid() call.
304///
305/// Note: this can block if libc::WNOHANG is not set and EINTR is not handled internally.
306pub fn wait_for_pid<A: AsRawPid>(pid: A, options: c_int) -> Result<(Option<Pid>, ExitStatus)> {
307    let pid = pid.as_raw_pid();
308    let mut status: c_int = 1;
309    // SAFETY:
310    // Safe because status is owned and the error is checked.
311    let ret = unsafe { libc::waitpid(pid, &mut status, options) };
312    if ret < 0 {
313        return errno_result();
314    }
315    Ok((
316        if ret == 0 { None } else { Some(ret) },
317        ExitStatus::from_raw(status),
318    ))
319}
320
321/// Reaps a child process that has terminated.
322///
323/// Returns `Ok(pid)` where `pid` is the process that was reaped or `Ok(0)` if none of the children
324/// have terminated. An `Error` is with `errno == ECHILD` if there are no children left to reap.
325///
326/// # Examples
327///
328/// Reaps all child processes until there are no terminated children to reap.
329///
330/// ```
331/// fn reap_children() {
332///     loop {
333///         match base::linux::reap_child() {
334///             Ok(0) => println!("no children ready to reap"),
335///             Ok(pid) => {
336///                 println!("reaped {}", pid);
337///                 continue
338///             },
339///             Err(e) if e.errno() == libc::ECHILD => println!("no children left"),
340///             Err(e) => println!("error reaping children: {}", e),
341///         }
342///         break
343///     }
344/// }
345/// ```
346pub fn reap_child() -> Result<Pid> {
347    // SAFETY:
348    // Safe because we pass in no memory, prevent blocking with WNOHANG, and check for error.
349    let ret = unsafe { waitpid(-1, ptr::null_mut(), WNOHANG) };
350    if ret == -1 {
351        errno_result()
352    } else {
353        Ok(ret)
354    }
355}
356
357/// Kill all processes in the current process group.
358///
359/// On success, this kills all processes in the current process group, including the current
360/// process, meaning this will not return. This is equivalent to a call to `kill(0, SIGKILL)`.
361pub fn kill_process_group() -> Result<()> {
362    // SAFETY: Safe because pid is 'self group' and return value doesn't matter.
363    unsafe { kill(0, SIGKILL) }?;
364    // Kill succeeded, so this process never reaches here.
365    unreachable!();
366}
367
368/// Spawns a pipe pair where the first pipe is the read end and the second pipe is the write end.
369///
370/// The `O_CLOEXEC` flag will be set during pipe creation.
371pub fn pipe() -> Result<(File, File)> {
372    let mut pipe_fds = [-1; 2];
373    // SAFETY:
374    // Safe because pipe2 will only write 2 element array of i32 to the given pointer, and we check
375    // for error.
376    let ret = unsafe { pipe2(&mut pipe_fds[0], O_CLOEXEC) };
377    if ret == -1 {
378        errno_result()
379    } else {
380        // SAFETY:
381        // Safe because both fds must be valid for pipe2 to have returned sucessfully and we have
382        // exclusive ownership of them.
383        Ok(unsafe {
384            (
385                File::from_raw_fd(pipe_fds[0]),
386                File::from_raw_fd(pipe_fds[1]),
387            )
388        })
389    }
390}
391
392/// Sets the pipe signified with fd to `size`.
393///
394/// Returns the new size of the pipe or an error if the OS fails to set the pipe size.
395pub fn set_pipe_size(fd: RawFd, size: usize) -> Result<usize> {
396    // SAFETY:
397    // Safe because fcntl with the `F_SETPIPE_SZ` arg doesn't touch memory.
398    syscall!(unsafe { fcntl(fd, libc::F_SETPIPE_SZ, size as c_int) }).map(|ret| ret as usize)
399}
400
401/// Test-only function used to create a pipe that is full. The pipe is created, has its size set to
402/// the minimum and then has that much data written to it. Use `new_pipe_full` to test handling of
403/// blocking `write` calls in unit tests.
404pub fn new_pipe_full() -> Result<(File, File)> {
405    use std::io::Write;
406
407    let (rx, mut tx) = pipe()?;
408    // The smallest allowed size of a pipe is the system page size on linux.
409    let page_size = set_pipe_size(tx.as_raw_descriptor(), round_up_to_page_size(1))?;
410
411    // Fill the pipe with page_size zeros so the next write call will block.
412    let buf = vec![0u8; page_size];
413    tx.write_all(&buf)?;
414
415    Ok((rx, tx))
416}
417
418/// Used to attempt to clean up a named pipe after it is no longer used.
419pub struct UnlinkUnixDatagram(pub UnixDatagram);
420impl AsRef<UnixDatagram> for UnlinkUnixDatagram {
421    fn as_ref(&self) -> &UnixDatagram {
422        &self.0
423    }
424}
425impl Drop for UnlinkUnixDatagram {
426    fn drop(&mut self) {
427        if let Ok(addr) = self.0.local_addr() {
428            if let Some(path) = addr.as_pathname() {
429                if let Err(e) = remove_file(path) {
430                    warn!("failed to remove control socket file: {}", e);
431                }
432            }
433        }
434    }
435}
436
437/// Used to attempt to clean up a named pipe after it is no longer used.
438pub struct UnlinkUnixListener(pub UnixListener);
439
440impl AsRef<UnixListener> for UnlinkUnixListener {
441    fn as_ref(&self) -> &UnixListener {
442        &self.0
443    }
444}
445
446impl Deref for UnlinkUnixListener {
447    type Target = UnixListener;
448
449    fn deref(&self) -> &UnixListener {
450        &self.0
451    }
452}
453
454impl Drop for UnlinkUnixListener {
455    fn drop(&mut self) {
456        if let Ok(addr) = self.0.local_addr() {
457            if let Some(path) = addr.as_pathname() {
458                if let Err(e) = remove_file(path) {
459                    warn!("failed to remove control socket file: {}", e);
460                }
461            }
462        }
463    }
464}
465
466/// Verifies that |raw_descriptor| is actually owned by this process and duplicates it
467/// to ensure that we have a unique handle to it.
468pub fn validate_raw_descriptor(raw_descriptor: RawDescriptor) -> Result<RawDescriptor> {
469    validate_raw_fd(&raw_descriptor)
470}
471
472/// Verifies that |raw_fd| is actually owned by this process and duplicates it to ensure that
473/// we have a unique handle to it.
474pub fn validate_raw_fd(raw_fd: &RawFd) -> Result<RawFd> {
475    // Checking that close-on-exec isn't set helps filter out FDs that were opened by
476    // crosvm as all crosvm FDs are close on exec.
477    // SAFETY:
478    // Safe because this doesn't modify any memory and we check the return value.
479    let flags = unsafe { libc::fcntl(*raw_fd, libc::F_GETFD) };
480    if flags < 0 || (flags & libc::FD_CLOEXEC) != 0 {
481        return Err(Error::new(libc::EBADF));
482    }
483
484    // SAFETY:
485    // Duplicate the fd to ensure that we don't accidentally close an fd previously
486    // opened by another subsystem.  Safe because this doesn't modify any memory and
487    // we check the return value.
488    let dup_fd = unsafe { libc::fcntl(*raw_fd, libc::F_DUPFD_CLOEXEC, 0) };
489    if dup_fd < 0 {
490        return Err(Error::last());
491    }
492    Ok(dup_fd as RawFd)
493}
494
495/// Utility function that returns true if the given FD is readable without blocking.
496///
497/// On an error, such as an invalid or incompatible FD, this will return false, which can not be
498/// distinguished from a non-ready to read FD.
499pub fn poll_in<F: AsRawDescriptor>(fd: &F) -> bool {
500    let mut fds = libc::pollfd {
501        fd: fd.as_raw_descriptor(),
502        events: libc::POLLIN,
503        revents: 0,
504    };
505    // SAFETY:
506    // Safe because we give a valid pointer to a list (of 1) FD and check the return value.
507    let ret = unsafe { libc::poll(&mut fds, 1, 0) };
508    // An error probably indicates an invalid FD, or an FD that can't be polled. Returning false in
509    // that case is probably correct as such an FD is unlikely to be readable, although there are
510    // probably corner cases in which that is wrong.
511    if ret == -1 {
512        return false;
513    }
514    fds.revents & libc::POLLIN != 0
515}
516
517/// Return the maximum Duration that can be used with libc::timespec.
518pub fn max_timeout() -> Duration {
519    Duration::new(libc::time_t::MAX as u64, 999999999)
520}
521
522/// If the given path is of the form /proc/self/fd/N for some N, returns `Ok(Some(N))`. Otherwise
523/// returns `Ok(None)`.
524pub fn safe_descriptor_from_path<P: AsRef<Path>>(path: P) -> Result<Option<SafeDescriptor>> {
525    let path = path.as_ref();
526    if path.parent() == Some(Path::new("/proc/self/fd")) {
527        let raw_descriptor = path
528            .file_name()
529            .and_then(|fd_osstr| fd_osstr.to_str())
530            .and_then(|fd_str| fd_str.parse::<RawFd>().ok())
531            .ok_or_else(|| Error::new(EINVAL))?;
532        let validated_fd = validate_raw_fd(&raw_descriptor)?;
533        Ok(Some(
534            // SAFETY:
535            // Safe because nothing else has access to validated_fd after this call.
536            unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
537        ))
538    } else {
539        Ok(None)
540    }
541}
542
543/// Check FD is not opened by crosvm and returns a FD that is freshly DUPFD_CLOEXEC's.
544/// A SafeDescriptor is created from the duplicated fd. It does not take ownership of
545/// fd passed by argument.
546pub fn safe_descriptor_from_cmdline_fd(fd: &RawFd) -> Result<SafeDescriptor> {
547    let validated_fd = validate_raw_fd(fd)?;
548    Ok(
549        // SAFETY:
550        // Safe because nothing else has access to validated_fd after this call.
551        unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
552    )
553}
554
555/// Open the file with the given path, or if it is of the form `/proc/self/fd/N` then just use the
556/// file descriptor.
557///
558/// Note that this will not work properly if the same `/proc/self/fd/N` path is used twice in
559/// different places, as the metadata (including the offset) will be shared between both file
560/// descriptors.
561pub fn open_file_or_duplicate<P: AsRef<Path>>(path: P, options: &OpenOptions) -> Result<File> {
562    let path = path.as_ref();
563    // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
564    Ok(if let Some(fd) = safe_descriptor_from_path(path)? {
565        fd.into()
566    } else {
567        options.open(path)?
568    })
569}
570
571/// Get the soft and hard limits of max number of open files allowed by the environment.
572pub fn max_open_files() -> Result<libc::rlimit64> {
573    let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
574
575    // SAFETY:
576    // Safe because this will only modify `buf` and we check the return value.
577    let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
578    if res == 0 {
579        // SAFETY:
580        // Safe because the kernel guarantees that the struct is fully initialized.
581        let limit = unsafe { buf.assume_init() };
582        Ok(limit)
583    } else {
584        errno_result()
585    }
586}
587
588/// Executes the given callback with extended soft limit of max number of open files. After the
589/// callback executed, restore the limit.
590pub fn call_with_extended_max_files<T, E>(
591    callback: impl FnOnce() -> std::result::Result<T, E>,
592) -> Result<std::result::Result<T, E>> {
593    let cur_limit = max_open_files()?;
594    let new_limit = libc::rlimit64 {
595        rlim_cur: cur_limit.rlim_max,
596        ..cur_limit
597    };
598    let needs_extension = cur_limit.rlim_cur < new_limit.rlim_cur;
599    if needs_extension {
600        set_max_open_files(new_limit)?;
601    }
602
603    let r = callback();
604
605    // Restore the soft limit.
606    if needs_extension {
607        set_max_open_files(cur_limit)?;
608    }
609
610    Ok(r)
611}
612
613/// Set the soft and hard limits of max number of open files to the given value.
614fn set_max_open_files(limit: libc::rlimit64) -> Result<()> {
615    // SAFETY: RLIMIT_NOFILE is known only to read a buffer of size rlimit64, and we have always
616    // rlimit64 allocated.
617    let res = unsafe { libc::setrlimit64(libc::RLIMIT_NOFILE, &limit) };
618    if res == 0 {
619        Ok(())
620    } else {
621        errno_result()
622    }
623}
624
625/// Moves the requested PID/TID to a particular cgroup
626pub fn move_to_cgroup(cgroup_path: PathBuf, id_to_write: Pid, cgroup_file: &str) -> Result<()> {
627    use std::io::Write;
628
629    let gpu_cgroup_file = cgroup_path.join(cgroup_file);
630    let mut f = File::create(gpu_cgroup_file)?;
631    f.write_all(id_to_write.to_string().as_bytes())?;
632    Ok(())
633}
634
635pub fn move_task_to_cgroup(cgroup_path: PathBuf, thread_id: Pid) -> Result<()> {
636    move_to_cgroup(cgroup_path, thread_id, "tasks")
637}
638
639pub fn move_proc_to_cgroup(cgroup_path: PathBuf, process_id: Pid) -> Result<()> {
640    move_to_cgroup(cgroup_path, process_id, "cgroup.procs")
641}
642
643/// Queries the property of a specified CPU sysfs node.
644fn parse_sysfs_cpu_info_vec(cpu_id: usize, property: &str) -> Result<Vec<u32>> {
645    let path = format!("/sys/devices/system/cpu/cpu{cpu_id}/{property}");
646    let res: Result<Vec<_>> = std::fs::read_to_string(path)?
647        .split_whitespace()
648        .map(|x| x.parse().map_err(|_| Error::new(libc::EINVAL)))
649        .collect();
650    res
651}
652
653/// Returns a list of supported frequencies in kHz for a given logical core.
654pub fn logical_core_frequencies_khz(cpu_id: usize) -> Result<Vec<u32>> {
655    parse_sysfs_cpu_info_vec(cpu_id, "cpufreq/scaling_available_frequencies")
656}
657
658fn parse_sysfs_cpu_info(cpu_id: usize, property: &str) -> Result<u32> {
659    let path = format!("/sys/devices/system/cpu/cpu{cpu_id}/{property}");
660    std::fs::read_to_string(path)?
661        .trim()
662        .parse()
663        .map_err(|_| Error::new(libc::EINVAL))
664}
665
666/// Returns the capacity (measure of performance) of a given logical core.
667pub fn logical_core_capacity(cpu_id: usize) -> Result<u32> {
668    static CPU_MAX_FREQS: OnceLock<Option<Vec<u32>>> = OnceLock::new();
669
670    let cpu_capacity = parse_sysfs_cpu_info(cpu_id, "cpu_capacity")?;
671
672    // Collect and cache the maximum frequencies of all cores. We need to know
673    // the largest maximum frequency between all cores to reverse normalization,
674    // so collect all the values once on the first call to this function.
675    let cpu_max_freqs = CPU_MAX_FREQS.get_or_init(|| {
676        (0..number_of_logical_cores().ok()?)
677            .map(|cpu_id| logical_core_max_freq_khz(cpu_id).ok())
678            .collect()
679    });
680
681    if let Some(cpu_max_freqs) = cpu_max_freqs {
682        let largest_max_freq = *cpu_max_freqs.iter().max().ok_or(Error::new(EINVAL))?;
683        let cpu_max_freq = *cpu_max_freqs.get(cpu_id).ok_or(Error::new(EINVAL))?;
684        let normalized_cpu_capacity = (u64::from(cpu_capacity) * u64::from(largest_max_freq))
685            .checked_div(u64::from(cpu_max_freq))
686            .ok_or(Error::new(EINVAL))?;
687        normalized_cpu_capacity
688            .try_into()
689            .map_err(|_| Error::new(EINVAL))
690    } else {
691        // cpu-freq is not enabled. Fall back to using the normalized capacity.
692        Ok(cpu_capacity)
693    }
694}
695
696/// Returns the cluster ID of a given logical core.
697pub fn logical_core_cluster_id(cpu_id: usize) -> Result<u32> {
698    parse_sysfs_cpu_info(cpu_id, "topology/physical_package_id")
699}
700
701/// Returns the maximum frequency (in kHz) of a given logical core.
702pub fn logical_core_max_freq_khz(cpu_id: usize) -> Result<u32> {
703    parse_sysfs_cpu_info(cpu_id, "cpufreq/cpuinfo_max_freq")
704}
705
706#[repr(C)]
707pub struct sched_attr {
708    pub size: u32,
709
710    pub sched_policy: u32,
711    pub sched_flags: u64,
712    pub sched_nice: i32,
713
714    pub sched_priority: u32,
715
716    pub sched_runtime: u64,
717    pub sched_deadline: u64,
718    pub sched_period: u64,
719
720    pub sched_util_min: u32,
721    pub sched_util_max: u32,
722}
723
724impl Default for sched_attr {
725    fn default() -> Self {
726        Self {
727            size: std::mem::size_of::<sched_attr>() as u32,
728            sched_policy: 0,
729            sched_flags: 0,
730            sched_nice: 0,
731            sched_priority: 0,
732            sched_runtime: 0,
733            sched_deadline: 0,
734            sched_period: 0,
735            sched_util_min: 0,
736            sched_util_max: 0,
737        }
738    }
739}
740
741pub fn sched_setattr(pid: Pid, attr: &mut sched_attr, flags: u32) -> Result<()> {
742    // SAFETY: Safe becuase all the args are valid and the return valud is checked.
743    let ret = unsafe {
744        libc::syscall(
745            libc::SYS_sched_setattr,
746            pid as usize,
747            attr as *mut sched_attr as usize,
748            flags as usize,
749        )
750    };
751
752    if ret < 0 {
753        return Err(Error::last());
754    }
755    Ok(())
756}
757
758#[cfg(test)]
759mod tests {
760    use std::io::Write;
761    use std::os::fd::AsRawFd;
762
763    use super::*;
764    use crate::unix::add_fd_flags;
765
766    #[test]
767    fn pipe_size_and_fill() {
768        let (_rx, mut tx) = new_pipe_full().expect("Failed to pipe");
769
770        // To  check that setting the size worked, set the descriptor to non blocking and check that
771        // write returns an error.
772        add_fd_flags(tx.as_raw_fd(), libc::O_NONBLOCK).expect("Failed to set tx non blocking");
773        tx.write(&[0u8; 8])
774            .expect_err("Write after fill didn't fail");
775    }
776}