1#[cfg(target_os = "android")]
8mod android;
9#[cfg(target_os = "android")]
10use android as target_os;
11#[cfg(target_os = "linux")]
12#[allow(clippy::module_inception)]
13mod linux;
14#[cfg(target_os = "linux")]
15use linux as target_os;
16use log::warn;
17#[macro_use]
18pub mod ioctl;
19#[macro_use]
20pub mod syslog;
21mod acpi_event;
22mod capabilities;
23mod descriptor;
24mod event;
25mod file;
26mod file_traits;
27mod mmap;
28mod net;
29mod netlink;
30mod notifiers;
31pub mod platform_timer_resolution;
32mod poll;
33mod priority;
34mod sched;
35mod shm;
36pub mod signal;
37mod signalfd;
38mod terminal;
39mod timer;
40pub mod vsock;
41mod write_zeroes;
42
43use std::ffi::CString;
44use std::fs::remove_file;
45use std::fs::File;
46use std::fs::OpenOptions;
47use std::mem;
48use std::mem::MaybeUninit;
49use std::ops::Deref;
50use std::os::unix::io::FromRawFd;
51use std::os::unix::io::RawFd;
52use std::os::unix::net::UnixDatagram;
53use std::os::unix::net::UnixListener;
54use std::os::unix::process::ExitStatusExt;
55use std::path::Path;
56use std::path::PathBuf;
57use std::process::ExitStatus;
58use std::ptr;
59use std::sync::OnceLock;
60use std::time::Duration;
61
62pub use acpi_event::*;
63pub use capabilities::drop_capabilities;
64pub use event::EventExt;
65pub(crate) use event::PlatformEvent;
66pub use file::find_next_data;
67pub use file::FileDataIterator;
68pub(crate) use file_traits::lib::*;
69pub use ioctl::*;
70use libc::c_int;
71use libc::c_long;
72use libc::fcntl;
73use libc::pipe2;
74use libc::prctl;
75use libc::syscall;
76use libc::waitpid;
77use libc::SYS_getpid;
78use libc::SYS_getppid;
79use libc::SYS_gettid;
80use libc::EINVAL;
81use libc::O_CLOEXEC;
82use libc::PR_SET_NAME;
83use libc::SIGKILL;
84use libc::WNOHANG;
85pub use mmap::*;
86pub(in crate::sys) use net::sendmsg_nosignal as sendmsg;
87pub(in crate::sys) use net::sockaddr_un;
88pub(in crate::sys) use net::sockaddrv4_to_lib_c;
89pub(in crate::sys) use net::sockaddrv6_to_lib_c;
90pub use netlink::*;
91pub use poll::EventContext;
92pub use priority::*;
93pub use sched::*;
94pub use shm::MemfdSeals;
95pub use shm::SharedMemoryLinux;
96pub use signal::*;
97pub use signalfd::Error as SignalFdError;
98pub use signalfd::*;
99pub use terminal::*;
100pub(crate) use write_zeroes::file_punch_hole;
101pub(crate) use write_zeroes::file_write_zeroes_at;
102
103use crate::descriptor::FromRawDescriptor;
104use crate::descriptor::SafeDescriptor;
105pub use crate::errno::Error;
106pub use crate::errno::Result;
107pub use crate::errno::*;
108use crate::number_of_logical_cores;
109use crate::round_up_to_page_size;
110pub use crate::sys::unix::descriptor::*;
111use crate::syscall;
112use crate::AsRawDescriptor;
113use crate::Pid;
114
115pub type Uid = libc::uid_t;
117pub type Gid = libc::gid_t;
118pub type Mode = libc::mode_t;
119
120const CPU_DIR: &str = "/sys/devices/system/cpu";
122
123#[inline(always)]
125pub fn set_thread_name(name: &str) -> Result<()> {
126 let name = CString::new(name).or(Err(Error::new(EINVAL)))?;
127 let ret = unsafe { prctl(PR_SET_NAME, name.as_c_str()) };
129 if ret == 0 {
130 Ok(())
131 } else {
132 errno_result()
133 }
134}
135
136#[inline(always)]
139pub fn getpid() -> Pid {
140 unsafe { syscall(SYS_getpid as c_long) as Pid }
143}
144
145#[inline(always)]
147pub fn getppid() -> Pid {
148 unsafe { syscall(SYS_getppid as c_long) as Pid }
151}
152
153pub fn gettid() -> Pid {
155 unsafe { syscall(SYS_gettid as c_long) as Pid }
158}
159
160#[inline(always)]
162pub fn geteuid() -> Uid {
163 unsafe { libc::geteuid() }
166}
167
168#[inline(always)]
170pub fn getegid() -> Gid {
171 unsafe { libc::getegid() }
174}
175
176pub enum FlockOperation {
178 LockShared,
179 LockExclusive,
180 Unlock,
181}
182
183#[inline(always)]
186pub fn flock<F: AsRawDescriptor>(file: &F, op: FlockOperation, nonblocking: bool) -> Result<()> {
187 let mut operation = match op {
188 FlockOperation::LockShared => libc::LOCK_SH,
189 FlockOperation::LockExclusive => libc::LOCK_EX,
190 FlockOperation::Unlock => libc::LOCK_UN,
191 };
192
193 if nonblocking {
194 operation |= libc::LOCK_NB;
195 }
196
197 syscall!(unsafe { libc::flock(file.as_raw_descriptor(), operation) }).map(|_| ())
200}
201
202pub enum FallocateMode {
204 PunchHole,
205 ZeroRange,
206 Allocate,
207}
208
209impl From<FallocateMode> for i32 {
210 fn from(value: FallocateMode) -> Self {
211 match value {
212 FallocateMode::Allocate => libc::FALLOC_FL_KEEP_SIZE,
213 FallocateMode::PunchHole => libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
214 FallocateMode::ZeroRange => libc::FALLOC_FL_ZERO_RANGE | libc::FALLOC_FL_KEEP_SIZE,
215 }
216 }
217}
218
219impl From<FallocateMode> for u32 {
220 fn from(value: FallocateMode) -> Self {
221 Into::<i32>::into(value) as u32
222 }
223}
224
225pub fn fallocate<F: AsRawDescriptor>(
227 file: &F,
228 mode: FallocateMode,
229 offset: u64,
230 len: u64,
231) -> Result<()> {
232 let offset = if offset > libc::off64_t::MAX as u64 {
233 return Err(Error::new(libc::EINVAL));
234 } else {
235 offset as libc::off64_t
236 };
237
238 let len = if len > libc::off64_t::MAX as u64 {
239 return Err(Error::new(libc::EINVAL));
240 } else {
241 len as libc::off64_t
242 };
243
244 syscall!(unsafe { libc::fallocate64(file.as_raw_descriptor(), mode.into(), offset, len) })
248 .map(|_| ())
249}
250
251pub fn fstat<F: AsRawDescriptor>(f: &F) -> Result<libc::stat64> {
253 let mut st = MaybeUninit::<libc::stat64>::zeroed();
254
255 syscall!(unsafe { libc::fstat64(f.as_raw_descriptor(), st.as_mut_ptr()) })?;
259
260 Ok(unsafe { st.assume_init() })
263}
264
265pub fn is_block_file<F: AsRawDescriptor>(file: &F) -> Result<bool> {
267 let stat = fstat(file)?;
268 Ok((stat.st_mode & libc::S_IFMT) == libc::S_IFBLK)
269}
270
271const BLOCK_IO_TYPE: u32 = 0x12;
272ioctl_io_nr!(BLKDISCARD, BLOCK_IO_TYPE, 119);
273
274pub fn discard_block<F: AsRawDescriptor>(file: &F, offset: u64, len: u64) -> Result<()> {
276 let range: [u64; 2] = [offset, len];
277 syscall!(unsafe { libc::ioctl(file.as_raw_descriptor(), BLKDISCARD, &range) }).map(|_| ())
284}
285
286pub trait AsRawPid {
288 fn as_raw_pid(&self) -> Pid;
289}
290
291impl AsRawPid for Pid {
292 fn as_raw_pid(&self) -> Pid {
293 *self
294 }
295}
296
297impl AsRawPid for std::process::Child {
298 fn as_raw_pid(&self) -> Pid {
299 self.id() as Pid
300 }
301}
302
303pub fn wait_for_pid<A: AsRawPid>(pid: A, options: c_int) -> Result<(Option<Pid>, ExitStatus)> {
310 let pid = pid.as_raw_pid();
311 let mut status: c_int = 1;
312 let ret = unsafe { libc::waitpid(pid, &mut status, options) };
315 if ret < 0 {
316 return errno_result();
317 }
318 Ok((
319 if ret == 0 { None } else { Some(ret) },
320 ExitStatus::from_raw(status),
321 ))
322}
323
324pub fn reap_child() -> Result<Pid> {
350 let ret = unsafe { waitpid(-1, ptr::null_mut(), WNOHANG) };
353 if ret == -1 {
354 errno_result()
355 } else {
356 Ok(ret)
357 }
358}
359
360pub fn kill_process_group() -> Result<()> {
365 unsafe { kill(0, SIGKILL) }?;
367 unreachable!();
369}
370
371pub fn pipe() -> Result<(File, File)> {
375 let mut pipe_fds = [-1; 2];
376 let ret = unsafe { pipe2(&mut pipe_fds[0], O_CLOEXEC) };
380 if ret == -1 {
381 errno_result()
382 } else {
383 Ok(unsafe {
387 (
388 File::from_raw_fd(pipe_fds[0]),
389 File::from_raw_fd(pipe_fds[1]),
390 )
391 })
392 }
393}
394
395pub fn set_pipe_size(fd: RawFd, size: usize) -> Result<usize> {
399 syscall!(unsafe { fcntl(fd, libc::F_SETPIPE_SZ, size as c_int) }).map(|ret| ret as usize)
402}
403
404pub fn new_pipe_full() -> Result<(File, File)> {
408 use std::io::Write;
409
410 let (rx, mut tx) = pipe()?;
411 let page_size = set_pipe_size(tx.as_raw_descriptor(), round_up_to_page_size(1))?;
413
414 let buf = vec![0u8; page_size];
416 tx.write_all(&buf)?;
417
418 Ok((rx, tx))
419}
420
421pub struct UnlinkUnixDatagram(pub UnixDatagram);
423impl AsRef<UnixDatagram> for UnlinkUnixDatagram {
424 fn as_ref(&self) -> &UnixDatagram {
425 &self.0
426 }
427}
428impl Drop for UnlinkUnixDatagram {
429 fn drop(&mut self) {
430 if let Ok(addr) = self.0.local_addr() {
431 if let Some(path) = addr.as_pathname() {
432 if let Err(e) = remove_file(path) {
433 warn!("failed to remove control socket file: {}", e);
434 }
435 }
436 }
437 }
438}
439
440pub struct UnlinkUnixListener(pub UnixListener);
442
443impl AsRef<UnixListener> for UnlinkUnixListener {
444 fn as_ref(&self) -> &UnixListener {
445 &self.0
446 }
447}
448
449impl Deref for UnlinkUnixListener {
450 type Target = UnixListener;
451
452 fn deref(&self) -> &UnixListener {
453 &self.0
454 }
455}
456
457impl Drop for UnlinkUnixListener {
458 fn drop(&mut self) {
459 if let Ok(addr) = self.0.local_addr() {
460 if let Some(path) = addr.as_pathname() {
461 if let Err(e) = remove_file(path) {
462 warn!("failed to remove control socket file: {}", e);
463 }
464 }
465 }
466 }
467}
468
469pub fn validate_raw_descriptor(raw_descriptor: RawDescriptor) -> Result<RawDescriptor> {
472 validate_raw_fd(&raw_descriptor)
473}
474
475pub fn validate_raw_fd(raw_fd: &RawFd) -> Result<RawFd> {
478 let flags = unsafe { libc::fcntl(*raw_fd, libc::F_GETFD) };
483 if flags < 0 || (flags & libc::FD_CLOEXEC) != 0 {
484 return Err(Error::new(libc::EBADF));
485 }
486
487 let dup_fd = unsafe { libc::fcntl(*raw_fd, libc::F_DUPFD_CLOEXEC, 0) };
492 if dup_fd < 0 {
493 return Err(Error::last());
494 }
495 Ok(dup_fd as RawFd)
496}
497
498pub fn poll_in<F: AsRawDescriptor>(fd: &F) -> bool {
503 let mut fds = libc::pollfd {
504 fd: fd.as_raw_descriptor(),
505 events: libc::POLLIN,
506 revents: 0,
507 };
508 let ret = unsafe { libc::poll(&mut fds, 1, 0) };
511 if ret == -1 {
515 return false;
516 }
517 fds.revents & libc::POLLIN != 0
518}
519
520pub fn max_timeout() -> Duration {
522 Duration::new(libc::time_t::MAX as u64, 999999999)
523}
524
525pub fn safe_descriptor_from_path<P: AsRef<Path>>(path: P) -> Result<Option<SafeDescriptor>> {
528 let path = path.as_ref();
529 if path.parent() == Some(Path::new("/proc/self/fd")) {
530 let raw_descriptor = path
531 .file_name()
532 .and_then(|fd_osstr| fd_osstr.to_str())
533 .and_then(|fd_str| fd_str.parse::<RawFd>().ok())
534 .ok_or_else(|| Error::new(EINVAL))?;
535 let validated_fd = validate_raw_fd(&raw_descriptor)?;
536 Ok(Some(
537 unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
540 ))
541 } else {
542 Ok(None)
543 }
544}
545
546pub fn safe_descriptor_from_cmdline_fd(fd: &RawFd) -> Result<SafeDescriptor> {
550 let validated_fd = validate_raw_fd(fd)?;
551 Ok(
552 unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
555 )
556}
557
558pub fn open_file_or_duplicate<P: AsRef<Path>>(path: P, options: &OpenOptions) -> Result<File> {
565 let path = path.as_ref();
566 Ok(if let Some(fd) = safe_descriptor_from_path(path)? {
568 fd.into()
569 } else {
570 options.open(path)?
571 })
572}
573
574pub fn max_open_files() -> Result<libc::rlimit64> {
576 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
577
578 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
581 if res == 0 {
582 let limit = unsafe { buf.assume_init() };
585 Ok(limit)
586 } else {
587 errno_result()
588 }
589}
590
591pub fn call_with_extended_max_files<T, E>(
594 callback: impl FnOnce() -> std::result::Result<T, E>,
595) -> Result<std::result::Result<T, E>> {
596 let cur_limit = max_open_files()?;
597 let new_limit = libc::rlimit64 {
598 rlim_cur: cur_limit.rlim_max,
599 ..cur_limit
600 };
601 let needs_extension = cur_limit.rlim_cur < new_limit.rlim_cur;
602 if needs_extension {
603 set_max_open_files(new_limit)?;
604 }
605
606 let r = callback();
607
608 if needs_extension {
610 set_max_open_files(cur_limit)?;
611 }
612
613 Ok(r)
614}
615
616fn set_max_open_files(limit: libc::rlimit64) -> Result<()> {
618 let res = unsafe { libc::setrlimit64(libc::RLIMIT_NOFILE, &limit) };
621 if res == 0 {
622 Ok(())
623 } else {
624 errno_result()
625 }
626}
627
628pub fn move_to_cgroup(cgroup_path: PathBuf, id_to_write: Pid, cgroup_file: &str) -> Result<()> {
630 use std::io::Write;
631
632 let gpu_cgroup_file = cgroup_path.join(cgroup_file);
633 let mut f = File::create(gpu_cgroup_file)?;
634 f.write_all(id_to_write.to_string().as_bytes())?;
635 Ok(())
636}
637
638pub fn move_task_to_cgroup(cgroup_path: PathBuf, thread_id: Pid) -> Result<()> {
639 move_to_cgroup(cgroup_path, thread_id, "tasks")
640}
641
642pub fn move_proc_to_cgroup(cgroup_path: PathBuf, process_id: Pid) -> Result<()> {
643 move_to_cgroup(cgroup_path, process_id, "cgroup.procs")
644}
645
646fn read_sysfs_cpu_info_in_dir(cpu_dir: &str, cpu_id: usize, property: &str) -> Result<String> {
647 let path = Path::new(cpu_dir)
648 .join(format!("cpu{cpu_id}"))
649 .join(property);
650
651 std::fs::read_to_string(path).map_err(|e| e.into())
652}
653
654fn parse_sysfs_cpu_info_vec(cpu_id: usize, property: &str) -> Result<Vec<u32>> {
656 parse_sysfs_cpu_info_vec_in_dir(CPU_DIR, cpu_id, property)
657}
658
659fn parse_sysfs_cpu_info_vec_in_dir(
660 cpu_dir: &str,
661 cpu_id: usize,
662 property: &str,
663) -> Result<Vec<u32>> {
664 read_sysfs_cpu_info_in_dir(cpu_dir, cpu_id, property)?
665 .split_whitespace()
666 .map(|x| x.parse().map_err(|_| Error::new(libc::EINVAL)))
667 .collect()
668}
669
670pub fn logical_core_frequencies_khz(cpu_id: usize) -> Result<Vec<u32>> {
672 parse_sysfs_cpu_info_vec(cpu_id, "cpufreq/scaling_available_frequencies")
673}
674
675fn parse_sysfs_cpu_info(cpu_id: usize, property: &str) -> Result<u32> {
677 parse_sysfs_cpu_info_in_dir(CPU_DIR, cpu_id, property)
678}
679
680fn parse_sysfs_cpu_info_in_dir(cpu_dir: &str, cpu_id: usize, property: &str) -> Result<u32> {
681 read_sysfs_cpu_info_in_dir(cpu_dir, cpu_id, property)?
682 .trim()
683 .parse()
684 .map_err(|_| Error::new(libc::EINVAL))
685}
686
687pub fn logical_core_capacity(cpu_id: usize) -> Result<u32> {
689 static CPU_MAX_FREQS: OnceLock<Option<Vec<u32>>> = OnceLock::new();
690
691 let cpu_capacity = parse_sysfs_cpu_info(cpu_id, "cpu_capacity")?;
692
693 let cpu_max_freqs = CPU_MAX_FREQS.get_or_init(|| {
697 (0..number_of_logical_cores().ok()?)
698 .map(|cpu_id| logical_core_max_freq_khz(cpu_id).ok())
699 .collect()
700 });
701
702 if let Some(cpu_max_freqs) = cpu_max_freqs {
703 let largest_max_freq = *cpu_max_freqs.iter().max().ok_or(Error::new(EINVAL))?;
704 let cpu_max_freq = *cpu_max_freqs.get(cpu_id).ok_or(Error::new(EINVAL))?;
705 let normalized_cpu_capacity = (u64::from(cpu_capacity) * u64::from(largest_max_freq))
706 .checked_div(u64::from(cpu_max_freq))
707 .ok_or(Error::new(EINVAL))?;
708 normalized_cpu_capacity
709 .try_into()
710 .map_err(|_| Error::new(EINVAL))
711 } else {
712 Ok(cpu_capacity)
714 }
715}
716
717pub fn logical_core_cluster_id(cpu_id: usize) -> Result<u32> {
719 parse_sysfs_cpu_info(cpu_id, "topology/physical_package_id")
720}
721
722pub fn logical_core_max_freq_khz(cpu_id: usize) -> Result<u32> {
724 parse_sysfs_cpu_info(cpu_id, "cpufreq/cpuinfo_max_freq")
725}
726
727pub fn is_cpu_online(cpu_id: usize) -> Result<bool> {
730 let result = parse_sysfs_cpu_info(cpu_id, "online");
731 match result {
732 Err(e) => {
733 if e.errno() == libc::ENOENT {
734 Ok(true)
738 } else {
739 Err(e)
740 }
741 }
742 Ok(online) => Ok(online == 1),
743 }
744}
745
746#[repr(C)]
747pub struct sched_attr {
748 pub size: u32,
749
750 pub sched_policy: u32,
751 pub sched_flags: u64,
752 pub sched_nice: i32,
753
754 pub sched_priority: u32,
755
756 pub sched_runtime: u64,
757 pub sched_deadline: u64,
758 pub sched_period: u64,
759
760 pub sched_util_min: u32,
761 pub sched_util_max: u32,
762}
763
764impl Default for sched_attr {
765 fn default() -> Self {
766 Self {
767 size: std::mem::size_of::<sched_attr>() as u32,
768 sched_policy: 0,
769 sched_flags: 0,
770 sched_nice: 0,
771 sched_priority: 0,
772 sched_runtime: 0,
773 sched_deadline: 0,
774 sched_period: 0,
775 sched_util_min: 0,
776 sched_util_max: 0,
777 }
778 }
779}
780
781pub fn sched_setattr(pid: Pid, attr: &mut sched_attr, flags: u32) -> Result<()> {
782 let ret = unsafe {
784 libc::syscall(
785 libc::SYS_sched_setattr,
786 pid as usize,
787 attr as *mut sched_attr as usize,
788 flags as usize,
789 )
790 };
791
792 if ret < 0 {
793 return Err(Error::last());
794 }
795 Ok(())
796}
797
798#[cfg(test)]
799mod tests {
800 use std::fs::create_dir_all;
801 use std::fs::File;
802 use std::io::Write;
803 use std::os::fd::AsRawFd;
804
805 use tempfile::TempDir;
806
807 use super::*;
808 use crate::unix::add_fd_flags;
809
810 fn create_temp_file(path: &Path, content: &str) {
811 if let Some(parent) = path.parent() {
812 create_dir_all(parent).unwrap();
813 }
814 let mut file = File::create(path).unwrap();
815 file.write_all(content.as_bytes()).unwrap();
816 }
817
818 #[test]
819 fn pipe_size_and_fill() {
820 let (_rx, mut tx) = new_pipe_full().expect("Failed to pipe");
821
822 add_fd_flags(tx.as_raw_fd(), libc::O_NONBLOCK).expect("Failed to set tx non blocking");
825 tx.write(&[0u8; 8])
826 .expect_err("Write after fill didn't fail");
827 }
828
829 #[test]
830 fn test_parse_sysfs_cpu_info() {
831 let temp_dir = TempDir::new().unwrap();
832 let root = temp_dir.path();
833 let cpu_dir = root.join("sys/devices/system/cpu");
834 let cpu = 0;
835 let property = "cpufreq/cpuinfo_max_freq";
836 create_temp_file(
837 &root.join("sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"),
838 "1000",
839 );
840
841 assert_eq!(
842 parse_sysfs_cpu_info_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap(),
843 1000
844 );
845 }
846
847 #[test]
848 fn test_parse_sysfs_cpu_info_error() {
849 let temp_dir = TempDir::new().unwrap();
850 let root = temp_dir.path();
851 let cpu_dir = root.join("sys/devices/system/cpu");
852 let cpu = 0;
853 let property = "cpufreq/cpuinfo_max_freq";
854 let err =
857 parse_sysfs_cpu_info_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap_err();
858 assert_eq!(err, Error::new(libc::ENOENT));
859 }
860
861 #[test]
862 fn test_parse_sysfs_cpu_info_vec() {
863 let temp_dir = TempDir::new().unwrap();
864 let root = temp_dir.path();
865 let cpu_dir = root.join("sys/devices/system/cpu");
866 let cpu = 0;
867 let property = "cpufreq/scaling_available_frequencies";
868 create_temp_file(
869 &root.join("sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies"),
870 "1000 2000",
871 );
872
873 assert_eq!(
874 parse_sysfs_cpu_info_vec_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap(),
875 vec![1000, 2000]
876 );
877 }
878
879 #[test]
880 fn test_parse_sysfs_cpu_info_vec_error() {
881 let temp_dir = TempDir::new().unwrap();
882 let root = temp_dir.path();
883 let cpu_dir = root.join("sys/devices/system/cpu");
884 let cpu = 0;
885 let property = "cpufreq/scaling_available_frequencies";
886 let err =
889 parse_sysfs_cpu_info_vec_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap_err();
890 assert_eq!(err, Error::new(libc::ENOENT));
891 }
892}