1#[cfg(target_os = "android")]
8mod android;
9#[cfg(target_os = "android")]
10use android as target_os;
11#[cfg(target_os = "linux")]
12#[allow(clippy::module_inception)]
13mod linux;
14#[cfg(target_os = "linux")]
15use linux as target_os;
16use log::warn;
17#[macro_use]
18pub mod ioctl;
19#[macro_use]
20pub mod syslog;
21mod acpi_event;
22mod capabilities;
23mod descriptor;
24mod event;
25mod file;
26mod file_traits;
27mod mmap;
28mod net;
29mod netlink;
30mod notifiers;
31pub mod platform_timer_resolution;
32mod poll;
33mod priority;
34mod sched;
35mod shm;
36pub mod signal;
37mod signalfd;
38mod terminal;
39mod timer;
40pub mod vsock;
41mod write_zeroes;
42
43use std::ffi::CString;
44use std::fs::remove_file;
45use std::fs::File;
46use std::fs::OpenOptions;
47use std::mem;
48use std::mem::MaybeUninit;
49use std::ops::Deref;
50use std::os::unix::io::FromRawFd;
51use std::os::unix::io::RawFd;
52use std::os::unix::net::UnixDatagram;
53use std::os::unix::net::UnixListener;
54use std::os::unix::process::ExitStatusExt;
55use std::path::Path;
56use std::path::PathBuf;
57use std::process::ExitStatus;
58use std::ptr;
59use std::sync::OnceLock;
60use std::time::Duration;
61
62pub use acpi_event::*;
63pub use capabilities::drop_capabilities;
64pub use event::EventExt;
65pub(crate) use event::PlatformEvent;
66pub use file::find_next_data;
67pub use file::FileDataIterator;
68pub(crate) use file_traits::lib::*;
69pub use ioctl::*;
70use libc::c_int;
71use libc::c_long;
72use libc::fcntl;
73use libc::pipe2;
74use libc::prctl;
75use libc::syscall;
76use libc::waitpid;
77use libc::SYS_getpid;
78use libc::SYS_getppid;
79use libc::SYS_gettid;
80use libc::EINVAL;
81use libc::O_CLOEXEC;
82use libc::PR_SET_NAME;
83use libc::SIGKILL;
84use libc::WNOHANG;
85pub use mmap::*;
86pub(in crate::sys) use net::sendmsg_nosignal as sendmsg;
87pub(in crate::sys) use net::sockaddr_un;
88pub(in crate::sys) use net::sockaddrv4_to_lib_c;
89pub(in crate::sys) use net::sockaddrv6_to_lib_c;
90pub use netlink::*;
91pub use poll::EventContext;
92pub use priority::*;
93pub use sched::*;
94pub use shm::MemfdSeals;
95pub use shm::SharedMemoryLinux;
96pub use signal::*;
97pub use signalfd::Error as SignalFdError;
98pub use signalfd::*;
99pub use terminal::*;
100pub(crate) use write_zeroes::file_punch_hole;
101pub(crate) use write_zeroes::file_write_zeroes_at;
102
103use crate::descriptor::FromRawDescriptor;
104use crate::descriptor::SafeDescriptor;
105pub use crate::errno::Error;
106pub use crate::errno::Result;
107pub use crate::errno::*;
108use crate::number_of_logical_cores;
109use crate::round_up_to_page_size;
110pub use crate::sys::unix::descriptor::*;
111use crate::syscall;
112use crate::AsRawDescriptor;
113use crate::Pid;
114
115pub type Uid = libc::uid_t;
117pub type Gid = libc::gid_t;
118pub type Mode = libc::mode_t;
119
120#[inline(always)]
122pub fn set_thread_name(name: &str) -> Result<()> {
123 let name = CString::new(name).or(Err(Error::new(EINVAL)))?;
124 let ret = unsafe { prctl(PR_SET_NAME, name.as_c_str()) };
126 if ret == 0 {
127 Ok(())
128 } else {
129 errno_result()
130 }
131}
132
133#[inline(always)]
136pub fn getpid() -> Pid {
137 unsafe { syscall(SYS_getpid as c_long) as Pid }
140}
141
142#[inline(always)]
144pub fn getppid() -> Pid {
145 unsafe { syscall(SYS_getppid as c_long) as Pid }
148}
149
150pub fn gettid() -> Pid {
152 unsafe { syscall(SYS_gettid as c_long) as Pid }
155}
156
157#[inline(always)]
159pub fn geteuid() -> Uid {
160 unsafe { libc::geteuid() }
163}
164
165#[inline(always)]
167pub fn getegid() -> Gid {
168 unsafe { libc::getegid() }
171}
172
173pub enum FlockOperation {
175 LockShared,
176 LockExclusive,
177 Unlock,
178}
179
180#[inline(always)]
183pub fn flock<F: AsRawDescriptor>(file: &F, op: FlockOperation, nonblocking: bool) -> Result<()> {
184 let mut operation = match op {
185 FlockOperation::LockShared => libc::LOCK_SH,
186 FlockOperation::LockExclusive => libc::LOCK_EX,
187 FlockOperation::Unlock => libc::LOCK_UN,
188 };
189
190 if nonblocking {
191 operation |= libc::LOCK_NB;
192 }
193
194 syscall!(unsafe { libc::flock(file.as_raw_descriptor(), operation) }).map(|_| ())
197}
198
199pub enum FallocateMode {
201 PunchHole,
202 ZeroRange,
203 Allocate,
204}
205
206impl From<FallocateMode> for i32 {
207 fn from(value: FallocateMode) -> Self {
208 match value {
209 FallocateMode::Allocate => libc::FALLOC_FL_KEEP_SIZE,
210 FallocateMode::PunchHole => libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
211 FallocateMode::ZeroRange => libc::FALLOC_FL_ZERO_RANGE | libc::FALLOC_FL_KEEP_SIZE,
212 }
213 }
214}
215
216impl From<FallocateMode> for u32 {
217 fn from(value: FallocateMode) -> Self {
218 Into::<i32>::into(value) as u32
219 }
220}
221
222pub fn fallocate<F: AsRawDescriptor>(
224 file: &F,
225 mode: FallocateMode,
226 offset: u64,
227 len: u64,
228) -> Result<()> {
229 let offset = if offset > libc::off64_t::MAX as u64 {
230 return Err(Error::new(libc::EINVAL));
231 } else {
232 offset as libc::off64_t
233 };
234
235 let len = if len > libc::off64_t::MAX as u64 {
236 return Err(Error::new(libc::EINVAL));
237 } else {
238 len as libc::off64_t
239 };
240
241 syscall!(unsafe { libc::fallocate64(file.as_raw_descriptor(), mode.into(), offset, len) })
245 .map(|_| ())
246}
247
248pub fn fstat<F: AsRawDescriptor>(f: &F) -> Result<libc::stat64> {
250 let mut st = MaybeUninit::<libc::stat64>::zeroed();
251
252 syscall!(unsafe { libc::fstat64(f.as_raw_descriptor(), st.as_mut_ptr()) })?;
256
257 Ok(unsafe { st.assume_init() })
260}
261
262pub fn is_block_file<F: AsRawDescriptor>(file: &F) -> Result<bool> {
264 let stat = fstat(file)?;
265 Ok((stat.st_mode & libc::S_IFMT) == libc::S_IFBLK)
266}
267
268const BLOCK_IO_TYPE: u32 = 0x12;
269ioctl_io_nr!(BLKDISCARD, BLOCK_IO_TYPE, 119);
270
271pub fn discard_block<F: AsRawDescriptor>(file: &F, offset: u64, len: u64) -> Result<()> {
273 let range: [u64; 2] = [offset, len];
274 syscall!(unsafe { libc::ioctl(file.as_raw_descriptor(), BLKDISCARD, &range) }).map(|_| ())
281}
282
283pub trait AsRawPid {
285 fn as_raw_pid(&self) -> Pid;
286}
287
288impl AsRawPid for Pid {
289 fn as_raw_pid(&self) -> Pid {
290 *self
291 }
292}
293
294impl AsRawPid for std::process::Child {
295 fn as_raw_pid(&self) -> Pid {
296 self.id() as Pid
297 }
298}
299
300pub fn wait_for_pid<A: AsRawPid>(pid: A, options: c_int) -> Result<(Option<Pid>, ExitStatus)> {
307 let pid = pid.as_raw_pid();
308 let mut status: c_int = 1;
309 let ret = unsafe { libc::waitpid(pid, &mut status, options) };
312 if ret < 0 {
313 return errno_result();
314 }
315 Ok((
316 if ret == 0 { None } else { Some(ret) },
317 ExitStatus::from_raw(status),
318 ))
319}
320
321pub fn reap_child() -> Result<Pid> {
347 let ret = unsafe { waitpid(-1, ptr::null_mut(), WNOHANG) };
350 if ret == -1 {
351 errno_result()
352 } else {
353 Ok(ret)
354 }
355}
356
357pub fn kill_process_group() -> Result<()> {
362 unsafe { kill(0, SIGKILL) }?;
364 unreachable!();
366}
367
368pub fn pipe() -> Result<(File, File)> {
372 let mut pipe_fds = [-1; 2];
373 let ret = unsafe { pipe2(&mut pipe_fds[0], O_CLOEXEC) };
377 if ret == -1 {
378 errno_result()
379 } else {
380 Ok(unsafe {
384 (
385 File::from_raw_fd(pipe_fds[0]),
386 File::from_raw_fd(pipe_fds[1]),
387 )
388 })
389 }
390}
391
392pub fn set_pipe_size(fd: RawFd, size: usize) -> Result<usize> {
396 syscall!(unsafe { fcntl(fd, libc::F_SETPIPE_SZ, size as c_int) }).map(|ret| ret as usize)
399}
400
401pub fn new_pipe_full() -> Result<(File, File)> {
405 use std::io::Write;
406
407 let (rx, mut tx) = pipe()?;
408 let page_size = set_pipe_size(tx.as_raw_descriptor(), round_up_to_page_size(1))?;
410
411 let buf = vec![0u8; page_size];
413 tx.write_all(&buf)?;
414
415 Ok((rx, tx))
416}
417
418pub struct UnlinkUnixDatagram(pub UnixDatagram);
420impl AsRef<UnixDatagram> for UnlinkUnixDatagram {
421 fn as_ref(&self) -> &UnixDatagram {
422 &self.0
423 }
424}
425impl Drop for UnlinkUnixDatagram {
426 fn drop(&mut self) {
427 if let Ok(addr) = self.0.local_addr() {
428 if let Some(path) = addr.as_pathname() {
429 if let Err(e) = remove_file(path) {
430 warn!("failed to remove control socket file: {}", e);
431 }
432 }
433 }
434 }
435}
436
437pub struct UnlinkUnixListener(pub UnixListener);
439
440impl AsRef<UnixListener> for UnlinkUnixListener {
441 fn as_ref(&self) -> &UnixListener {
442 &self.0
443 }
444}
445
446impl Deref for UnlinkUnixListener {
447 type Target = UnixListener;
448
449 fn deref(&self) -> &UnixListener {
450 &self.0
451 }
452}
453
454impl Drop for UnlinkUnixListener {
455 fn drop(&mut self) {
456 if let Ok(addr) = self.0.local_addr() {
457 if let Some(path) = addr.as_pathname() {
458 if let Err(e) = remove_file(path) {
459 warn!("failed to remove control socket file: {}", e);
460 }
461 }
462 }
463 }
464}
465
466pub fn validate_raw_descriptor(raw_descriptor: RawDescriptor) -> Result<RawDescriptor> {
469 validate_raw_fd(&raw_descriptor)
470}
471
472pub fn validate_raw_fd(raw_fd: &RawFd) -> Result<RawFd> {
475 let flags = unsafe { libc::fcntl(*raw_fd, libc::F_GETFD) };
480 if flags < 0 || (flags & libc::FD_CLOEXEC) != 0 {
481 return Err(Error::new(libc::EBADF));
482 }
483
484 let dup_fd = unsafe { libc::fcntl(*raw_fd, libc::F_DUPFD_CLOEXEC, 0) };
489 if dup_fd < 0 {
490 return Err(Error::last());
491 }
492 Ok(dup_fd as RawFd)
493}
494
495pub fn poll_in<F: AsRawDescriptor>(fd: &F) -> bool {
500 let mut fds = libc::pollfd {
501 fd: fd.as_raw_descriptor(),
502 events: libc::POLLIN,
503 revents: 0,
504 };
505 let ret = unsafe { libc::poll(&mut fds, 1, 0) };
508 if ret == -1 {
512 return false;
513 }
514 fds.revents & libc::POLLIN != 0
515}
516
517pub fn max_timeout() -> Duration {
519 Duration::new(libc::time_t::MAX as u64, 999999999)
520}
521
522pub fn safe_descriptor_from_path<P: AsRef<Path>>(path: P) -> Result<Option<SafeDescriptor>> {
525 let path = path.as_ref();
526 if path.parent() == Some(Path::new("/proc/self/fd")) {
527 let raw_descriptor = path
528 .file_name()
529 .and_then(|fd_osstr| fd_osstr.to_str())
530 .and_then(|fd_str| fd_str.parse::<RawFd>().ok())
531 .ok_or_else(|| Error::new(EINVAL))?;
532 let validated_fd = validate_raw_fd(&raw_descriptor)?;
533 Ok(Some(
534 unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
537 ))
538 } else {
539 Ok(None)
540 }
541}
542
543pub fn safe_descriptor_from_cmdline_fd(fd: &RawFd) -> Result<SafeDescriptor> {
547 let validated_fd = validate_raw_fd(fd)?;
548 Ok(
549 unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
552 )
553}
554
555pub fn open_file_or_duplicate<P: AsRef<Path>>(path: P, options: &OpenOptions) -> Result<File> {
562 let path = path.as_ref();
563 Ok(if let Some(fd) = safe_descriptor_from_path(path)? {
565 fd.into()
566 } else {
567 options.open(path)?
568 })
569}
570
571pub fn max_open_files() -> Result<libc::rlimit64> {
573 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
574
575 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
578 if res == 0 {
579 let limit = unsafe { buf.assume_init() };
582 Ok(limit)
583 } else {
584 errno_result()
585 }
586}
587
588pub fn call_with_extended_max_files<T, E>(
591 callback: impl FnOnce() -> std::result::Result<T, E>,
592) -> Result<std::result::Result<T, E>> {
593 let cur_limit = max_open_files()?;
594 let new_limit = libc::rlimit64 {
595 rlim_cur: cur_limit.rlim_max,
596 ..cur_limit
597 };
598 let needs_extension = cur_limit.rlim_cur < new_limit.rlim_cur;
599 if needs_extension {
600 set_max_open_files(new_limit)?;
601 }
602
603 let r = callback();
604
605 if needs_extension {
607 set_max_open_files(cur_limit)?;
608 }
609
610 Ok(r)
611}
612
613fn set_max_open_files(limit: libc::rlimit64) -> Result<()> {
615 let res = unsafe { libc::setrlimit64(libc::RLIMIT_NOFILE, &limit) };
618 if res == 0 {
619 Ok(())
620 } else {
621 errno_result()
622 }
623}
624
625pub fn move_to_cgroup(cgroup_path: PathBuf, id_to_write: Pid, cgroup_file: &str) -> Result<()> {
627 use std::io::Write;
628
629 let gpu_cgroup_file = cgroup_path.join(cgroup_file);
630 let mut f = File::create(gpu_cgroup_file)?;
631 f.write_all(id_to_write.to_string().as_bytes())?;
632 Ok(())
633}
634
635pub fn move_task_to_cgroup(cgroup_path: PathBuf, thread_id: Pid) -> Result<()> {
636 move_to_cgroup(cgroup_path, thread_id, "tasks")
637}
638
639pub fn move_proc_to_cgroup(cgroup_path: PathBuf, process_id: Pid) -> Result<()> {
640 move_to_cgroup(cgroup_path, process_id, "cgroup.procs")
641}
642
643fn parse_sysfs_cpu_info_vec(cpu_id: usize, property: &str) -> Result<Vec<u32>> {
645 let path = format!("/sys/devices/system/cpu/cpu{cpu_id}/{property}");
646 let res: Result<Vec<_>> = std::fs::read_to_string(path)?
647 .split_whitespace()
648 .map(|x| x.parse().map_err(|_| Error::new(libc::EINVAL)))
649 .collect();
650 res
651}
652
653pub fn logical_core_frequencies_khz(cpu_id: usize) -> Result<Vec<u32>> {
655 parse_sysfs_cpu_info_vec(cpu_id, "cpufreq/scaling_available_frequencies")
656}
657
658fn parse_sysfs_cpu_info(cpu_id: usize, property: &str) -> Result<u32> {
659 let path = format!("/sys/devices/system/cpu/cpu{cpu_id}/{property}");
660 std::fs::read_to_string(path)?
661 .trim()
662 .parse()
663 .map_err(|_| Error::new(libc::EINVAL))
664}
665
666pub fn logical_core_capacity(cpu_id: usize) -> Result<u32> {
668 static CPU_MAX_FREQS: OnceLock<Option<Vec<u32>>> = OnceLock::new();
669
670 let cpu_capacity = parse_sysfs_cpu_info(cpu_id, "cpu_capacity")?;
671
672 let cpu_max_freqs = CPU_MAX_FREQS.get_or_init(|| {
676 (0..number_of_logical_cores().ok()?)
677 .map(|cpu_id| logical_core_max_freq_khz(cpu_id).ok())
678 .collect()
679 });
680
681 if let Some(cpu_max_freqs) = cpu_max_freqs {
682 let largest_max_freq = *cpu_max_freqs.iter().max().ok_or(Error::new(EINVAL))?;
683 let cpu_max_freq = *cpu_max_freqs.get(cpu_id).ok_or(Error::new(EINVAL))?;
684 let normalized_cpu_capacity = (u64::from(cpu_capacity) * u64::from(largest_max_freq))
685 .checked_div(u64::from(cpu_max_freq))
686 .ok_or(Error::new(EINVAL))?;
687 normalized_cpu_capacity
688 .try_into()
689 .map_err(|_| Error::new(EINVAL))
690 } else {
691 Ok(cpu_capacity)
693 }
694}
695
696pub fn logical_core_cluster_id(cpu_id: usize) -> Result<u32> {
698 parse_sysfs_cpu_info(cpu_id, "topology/physical_package_id")
699}
700
701pub fn logical_core_max_freq_khz(cpu_id: usize) -> Result<u32> {
703 parse_sysfs_cpu_info(cpu_id, "cpufreq/cpuinfo_max_freq")
704}
705
706#[repr(C)]
707pub struct sched_attr {
708 pub size: u32,
709
710 pub sched_policy: u32,
711 pub sched_flags: u64,
712 pub sched_nice: i32,
713
714 pub sched_priority: u32,
715
716 pub sched_runtime: u64,
717 pub sched_deadline: u64,
718 pub sched_period: u64,
719
720 pub sched_util_min: u32,
721 pub sched_util_max: u32,
722}
723
724impl Default for sched_attr {
725 fn default() -> Self {
726 Self {
727 size: std::mem::size_of::<sched_attr>() as u32,
728 sched_policy: 0,
729 sched_flags: 0,
730 sched_nice: 0,
731 sched_priority: 0,
732 sched_runtime: 0,
733 sched_deadline: 0,
734 sched_period: 0,
735 sched_util_min: 0,
736 sched_util_max: 0,
737 }
738 }
739}
740
741pub fn sched_setattr(pid: Pid, attr: &mut sched_attr, flags: u32) -> Result<()> {
742 let ret = unsafe {
744 libc::syscall(
745 libc::SYS_sched_setattr,
746 pid as usize,
747 attr as *mut sched_attr as usize,
748 flags as usize,
749 )
750 };
751
752 if ret < 0 {
753 return Err(Error::last());
754 }
755 Ok(())
756}
757
758#[cfg(test)]
759mod tests {
760 use std::io::Write;
761 use std::os::fd::AsRawFd;
762
763 use super::*;
764 use crate::unix::add_fd_flags;
765
766 #[test]
767 fn pipe_size_and_fill() {
768 let (_rx, mut tx) = new_pipe_full().expect("Failed to pipe");
769
770 add_fd_flags(tx.as_raw_fd(), libc::O_NONBLOCK).expect("Failed to set tx non blocking");
773 tx.write(&[0u8; 8])
774 .expect_err("Write after fill didn't fail");
775 }
776}