• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Implement syscalls using the vDSO.
2 //!
3 //! <https://man7.org/linux/man-pages/man7/vdso.7.html>
4 //!
5 //! # Safety
6 //!
7 //! Similar to syscalls.rs, this file performs raw system calls, and sometimes
8 //! passes them uninitialized memory buffers. This file also calls vDSO
9 //! functions.
10 #![allow(unsafe_code)]
11 
12 use super::conv::{c_int, ret};
13 #[cfg(target_arch = "x86")]
14 use super::reg::{ArgReg, RetReg, SyscallNumber, A0, A1, A2, A3, A4, A5, R0};
15 use super::time::types::{ClockId, DynamicClockId, Timespec};
16 use super::{c, vdso};
17 use crate::io;
18 #[cfg(all(asm, target_arch = "x86"))]
19 use core::arch::asm;
20 use core::mem::{transmute, MaybeUninit};
21 use core::ptr::null_mut;
22 use core::sync::atomic::AtomicPtr;
23 use core::sync::atomic::Ordering::Relaxed;
24 #[cfg(target_pointer_width = "32")]
25 use linux_raw_sys::general::timespec as __kernel_old_timespec;
26 use linux_raw_sys::general::{__kernel_clockid_t, __kernel_timespec};
27 
28 #[inline]
clock_gettime(which_clock: ClockId) -> __kernel_timespec29 pub(crate) fn clock_gettime(which_clock: ClockId) -> __kernel_timespec {
30     // Safety: `CLOCK_GETTIME` contains either null or the address of a
31     // function with an ABI like libc `clock_gettime`, and calling it has
32     // the side effect of writing to the result buffer, and no others.
33     unsafe {
34         let mut result = MaybeUninit::<__kernel_timespec>::uninit();
35         let callee = match transmute(CLOCK_GETTIME.load(Relaxed)) {
36             Some(callee) => callee,
37             None => init_clock_gettime(),
38         };
39         let r0 = callee(which_clock as c::c_int, result.as_mut_ptr());
40         assert_eq!(r0, 0);
41         result.assume_init()
42     }
43 }
44 
45 #[inline]
clock_gettime_dynamic(which_clock: DynamicClockId<'_>) -> io::Result<Timespec>46 pub(crate) fn clock_gettime_dynamic(which_clock: DynamicClockId<'_>) -> io::Result<Timespec> {
47     let id = match which_clock {
48         DynamicClockId::Known(id) => id as __kernel_clockid_t,
49 
50         DynamicClockId::Dynamic(fd) => {
51             // See `FD_TO_CLOCKID` in Linux's `clock_gettime` documentation.
52             use crate::backend::fd::AsRawFd;
53             const CLOCKFD: i32 = 3;
54             ((!fd.as_raw_fd() << 3) | CLOCKFD) as __kernel_clockid_t
55         }
56 
57         DynamicClockId::RealtimeAlarm => {
58             linux_raw_sys::general::CLOCK_REALTIME_ALARM as __kernel_clockid_t
59         }
60         DynamicClockId::Tai => linux_raw_sys::general::CLOCK_TAI as __kernel_clockid_t,
61         DynamicClockId::Boottime => linux_raw_sys::general::CLOCK_BOOTTIME as __kernel_clockid_t,
62         DynamicClockId::BoottimeAlarm => {
63             linux_raw_sys::general::CLOCK_BOOTTIME_ALARM as __kernel_clockid_t
64         }
65     };
66 
67     // Safety: `CLOCK_GETTIME` contains either null or the address of a
68     // function with an ABI like libc `clock_gettime`, and calling it has
69     // the side effect of writing to the result buffer, and no others.
70     unsafe {
71         const EINVAL: c::c_int = -(c::EINVAL as c::c_int);
72         let mut timespec = MaybeUninit::<Timespec>::uninit();
73         let callee = match transmute(CLOCK_GETTIME.load(Relaxed)) {
74             Some(callee) => callee,
75             None => init_clock_gettime(),
76         };
77         match callee(id, timespec.as_mut_ptr()) {
78             0 => (),
79             EINVAL => return Err(io::Errno::INVAL),
80             _ => _rustix_clock_gettime_via_syscall(id, timespec.as_mut_ptr())?,
81         }
82         Ok(timespec.assume_init())
83     }
84 }
85 
86 #[cfg(target_arch = "x86")]
87 pub(super) mod x86_via_vdso {
88     use super::{transmute, ArgReg, Relaxed, RetReg, SyscallNumber, A0, A1, A2, A3, A4, A5, R0};
89     use crate::backend::arch::asm;
90 
91     #[inline]
syscall0(nr: SyscallNumber<'_>) -> RetReg<R0>92     pub(in crate::backend) unsafe fn syscall0(nr: SyscallNumber<'_>) -> RetReg<R0> {
93         let callee = match transmute(super::SYSCALL.load(Relaxed)) {
94             Some(callee) => callee,
95             None => super::init_syscall(),
96         };
97         asm::indirect_syscall0(callee, nr)
98     }
99 
100     #[inline]
syscall1<'a>( nr: SyscallNumber<'a>, a0: ArgReg<'a, A0>, ) -> RetReg<R0>101     pub(in crate::backend) unsafe fn syscall1<'a>(
102         nr: SyscallNumber<'a>,
103         a0: ArgReg<'a, A0>,
104     ) -> RetReg<R0> {
105         let callee = match transmute(super::SYSCALL.load(Relaxed)) {
106             Some(callee) => callee,
107             None => super::init_syscall(),
108         };
109         asm::indirect_syscall1(callee, nr, a0)
110     }
111 
112     #[inline]
syscall1_noreturn<'a>( nr: SyscallNumber<'a>, a0: ArgReg<'a, A0>, ) -> !113     pub(in crate::backend) unsafe fn syscall1_noreturn<'a>(
114         nr: SyscallNumber<'a>,
115         a0: ArgReg<'a, A0>,
116     ) -> ! {
117         let callee = match transmute(super::SYSCALL.load(Relaxed)) {
118             Some(callee) => callee,
119             None => super::init_syscall(),
120         };
121         asm::indirect_syscall1_noreturn(callee, nr, a0)
122     }
123 
124     #[inline]
syscall2<'a>( nr: SyscallNumber<'a>, a0: ArgReg<'a, A0>, a1: ArgReg<'a, A1>, ) -> RetReg<R0>125     pub(in crate::backend) unsafe fn syscall2<'a>(
126         nr: SyscallNumber<'a>,
127         a0: ArgReg<'a, A0>,
128         a1: ArgReg<'a, A1>,
129     ) -> RetReg<R0> {
130         let callee = match transmute(super::SYSCALL.load(Relaxed)) {
131             Some(callee) => callee,
132             None => super::init_syscall(),
133         };
134         asm::indirect_syscall2(callee, nr, a0, a1)
135     }
136 
137     #[inline]
syscall3<'a>( nr: SyscallNumber<'a>, a0: ArgReg<'a, A0>, a1: ArgReg<'a, A1>, a2: ArgReg<'a, A2>, ) -> RetReg<R0>138     pub(in crate::backend) unsafe fn syscall3<'a>(
139         nr: SyscallNumber<'a>,
140         a0: ArgReg<'a, A0>,
141         a1: ArgReg<'a, A1>,
142         a2: ArgReg<'a, A2>,
143     ) -> RetReg<R0> {
144         let callee = match transmute(super::SYSCALL.load(Relaxed)) {
145             Some(callee) => callee,
146             None => super::init_syscall(),
147         };
148         asm::indirect_syscall3(callee, nr, a0, a1, a2)
149     }
150 
151     #[inline]
syscall4<'a>( nr: SyscallNumber<'a>, a0: ArgReg<'a, A0>, a1: ArgReg<'a, A1>, a2: ArgReg<'a, A2>, a3: ArgReg<'a, A3>, ) -> RetReg<R0>152     pub(in crate::backend) unsafe fn syscall4<'a>(
153         nr: SyscallNumber<'a>,
154         a0: ArgReg<'a, A0>,
155         a1: ArgReg<'a, A1>,
156         a2: ArgReg<'a, A2>,
157         a3: ArgReg<'a, A3>,
158     ) -> RetReg<R0> {
159         let callee = match transmute(super::SYSCALL.load(Relaxed)) {
160             Some(callee) => callee,
161             None => super::init_syscall(),
162         };
163         asm::indirect_syscall4(callee, nr, a0, a1, a2, a3)
164     }
165 
166     #[inline]
syscall5<'a>( nr: SyscallNumber<'a>, a0: ArgReg<'a, A0>, a1: ArgReg<'a, A1>, a2: ArgReg<'a, A2>, a3: ArgReg<'a, A3>, a4: ArgReg<'a, A4>, ) -> RetReg<R0>167     pub(in crate::backend) unsafe fn syscall5<'a>(
168         nr: SyscallNumber<'a>,
169         a0: ArgReg<'a, A0>,
170         a1: ArgReg<'a, A1>,
171         a2: ArgReg<'a, A2>,
172         a3: ArgReg<'a, A3>,
173         a4: ArgReg<'a, A4>,
174     ) -> RetReg<R0> {
175         let callee = match transmute(super::SYSCALL.load(Relaxed)) {
176             Some(callee) => callee,
177             None => super::init_syscall(),
178         };
179         asm::indirect_syscall5(callee, nr, a0, a1, a2, a3, a4)
180     }
181 
182     #[inline]
syscall6<'a>( nr: SyscallNumber<'a>, a0: ArgReg<'a, A0>, a1: ArgReg<'a, A1>, a2: ArgReg<'a, A2>, a3: ArgReg<'a, A3>, a4: ArgReg<'a, A4>, a5: ArgReg<'a, A5>, ) -> RetReg<R0>183     pub(in crate::backend) unsafe fn syscall6<'a>(
184         nr: SyscallNumber<'a>,
185         a0: ArgReg<'a, A0>,
186         a1: ArgReg<'a, A1>,
187         a2: ArgReg<'a, A2>,
188         a3: ArgReg<'a, A3>,
189         a4: ArgReg<'a, A4>,
190         a5: ArgReg<'a, A5>,
191     ) -> RetReg<R0> {
192         let callee = match transmute(super::SYSCALL.load(Relaxed)) {
193             Some(callee) => callee,
194             None => super::init_syscall(),
195         };
196         asm::indirect_syscall6(callee, nr, a0, a1, a2, a3, a4, a5)
197     }
198 
199     // With the indirect call, it isn't meaningful to do a separate
200     // `_readonly` optimization.
201     pub(in crate::backend) use {
202         syscall0 as syscall0_readonly, syscall1 as syscall1_readonly,
203         syscall2 as syscall2_readonly, syscall3 as syscall3_readonly,
204         syscall4 as syscall4_readonly, syscall5 as syscall5_readonly,
205         syscall6 as syscall6_readonly,
206     };
207 }
208 
209 type ClockGettimeType = unsafe extern "C" fn(c::c_int, *mut Timespec) -> c::c_int;
210 
211 /// The underlying syscall functions are only called from asm, using the
212 /// special syscall calling convention to pass arguments and return values,
213 /// which the signature here doesn't reflect.
214 #[cfg(target_arch = "x86")]
215 pub(super) type SyscallType = unsafe extern "C" fn();
216 
217 /// Initialize `CLOCK_GETTIME` and return its value.
init_clock_gettime() -> ClockGettimeType218 fn init_clock_gettime() -> ClockGettimeType {
219     init();
220     // Safety: Load the function address from static storage that we
221     // just initialized.
222     unsafe { transmute(CLOCK_GETTIME.load(Relaxed)) }
223 }
224 
225 /// Initialize `SYSCALL` and return its value.
226 #[cfg(target_arch = "x86")]
init_syscall() -> SyscallType227 fn init_syscall() -> SyscallType {
228     init();
229     // Safety: Load the function address from static storage that we
230     // just initialized.
231     unsafe { transmute(SYSCALL.load(Relaxed)) }
232 }
233 
234 /// `AtomicPtr` can't hold a `fn` pointer, so we use a `*` pointer to this
235 /// placeholder type, and cast it as needed.
236 struct Function;
237 static mut CLOCK_GETTIME: AtomicPtr<Function> = AtomicPtr::new(null_mut());
238 #[cfg(target_arch = "x86")]
239 static mut SYSCALL: AtomicPtr<Function> = AtomicPtr::new(null_mut());
240 
rustix_clock_gettime_via_syscall( clockid: c::c_int, res: *mut Timespec, ) -> c::c_int241 unsafe extern "C" fn rustix_clock_gettime_via_syscall(
242     clockid: c::c_int,
243     res: *mut Timespec,
244 ) -> c::c_int {
245     match _rustix_clock_gettime_via_syscall(clockid, res) {
246         Ok(()) => 0,
247         Err(err) => err.raw_os_error().wrapping_neg(),
248     }
249 }
250 
251 #[cfg(target_pointer_width = "32")]
_rustix_clock_gettime_via_syscall( clockid: c::c_int, res: *mut Timespec, ) -> io::Result<()>252 unsafe fn _rustix_clock_gettime_via_syscall(
253     clockid: c::c_int,
254     res: *mut Timespec,
255 ) -> io::Result<()> {
256     let r0 = syscall!(__NR_clock_gettime64, c_int(clockid), res);
257     match ret(r0) {
258         Err(io::Errno::NOSYS) => _rustix_clock_gettime_via_syscall_old(clockid, res),
259         otherwise => otherwise,
260     }
261 }
262 
263 #[cfg(target_pointer_width = "32")]
_rustix_clock_gettime_via_syscall_old( clockid: c::c_int, res: *mut Timespec, ) -> io::Result<()>264 unsafe fn _rustix_clock_gettime_via_syscall_old(
265     clockid: c::c_int,
266     res: *mut Timespec,
267 ) -> io::Result<()> {
268     // Ordinarily `rustix` doesn't like to emulate system calls, but in
269     // the case of time APIs, it's specific to Linux, specific to
270     // 32-bit architectures *and* specific to old kernel versions, and
271     // it's not that hard to fix up here, so that no other code needs
272     // to worry about this.
273     let mut old_result = MaybeUninit::<__kernel_old_timespec>::uninit();
274     let r0 = syscall!(__NR_clock_gettime, c_int(clockid), &mut old_result);
275     match ret(r0) {
276         Ok(()) => {
277             let old_result = old_result.assume_init();
278             *res = Timespec {
279                 tv_sec: old_result.tv_sec.into(),
280                 tv_nsec: old_result.tv_nsec.into(),
281             };
282             Ok(())
283         }
284         otherwise => otherwise,
285     }
286 }
287 
288 #[cfg(target_pointer_width = "64")]
_rustix_clock_gettime_via_syscall( clockid: c::c_int, res: *mut Timespec, ) -> io::Result<()>289 unsafe fn _rustix_clock_gettime_via_syscall(
290     clockid: c::c_int,
291     res: *mut Timespec,
292 ) -> io::Result<()> {
293     ret(syscall!(__NR_clock_gettime, c_int(clockid), res))
294 }
295 
296 /// A symbol pointing to an `int 0x80` instruction. This "function" is only
297 /// called from assembly, and only with the x86 syscall calling convention,
298 /// so its signature here is not its true signature.
299 #[cfg(all(asm, target_arch = "x86"))]
300 #[naked]
rustix_int_0x80()301 unsafe extern "C" fn rustix_int_0x80() {
302     asm!("int $$0x80", "ret", options(noreturn))
303 }
304 
305 // The outline version of the `rustix_int_0x80` above.
306 #[cfg(all(not(asm), target_arch = "x86"))]
307 extern "C" {
rustix_int_0x80()308     fn rustix_int_0x80();
309 }
310 
minimal_init()311 fn minimal_init() {
312     // Safety: Store default function addresses in static storage so that if we
313     // end up making any system calls while we read the vDSO, they'll work.
314     // If the memory happens to already be initialized, this is redundant, but
315     // not harmful.
316     unsafe {
317         CLOCK_GETTIME
318             .compare_exchange(
319                 null_mut(),
320                 rustix_clock_gettime_via_syscall as *mut Function,
321                 Relaxed,
322                 Relaxed,
323             )
324             .ok();
325         #[cfg(target_arch = "x86")]
326         {
327             SYSCALL
328                 .compare_exchange(
329                     null_mut(),
330                     rustix_int_0x80 as *mut Function,
331                     Relaxed,
332                     Relaxed,
333                 )
334                 .ok();
335         }
336     }
337 }
338 
init()339 fn init() {
340     minimal_init();
341 
342     if let Some(vdso) = vdso::Vdso::new() {
343         // Look up the platform-specific `clock_gettime` symbol as documented
344         // [here], except on 32-bit platforms where we look up the
345         // `64`-suffixed variant and fail if we don't find it.
346         //
347         // [here]: https://man7.org/linux/man-pages/man7/vdso.7.html
348         #[cfg(target_arch = "x86_64")]
349         let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
350         #[cfg(target_arch = "arm")]
351         let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
352         #[cfg(target_arch = "aarch64")]
353         let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_gettime"));
354         #[cfg(target_arch = "x86")]
355         let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
356         #[cfg(target_arch = "riscv64")]
357         let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_gettime"));
358         #[cfg(target_arch = "powerpc64")]
359         let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_gettime"));
360         #[cfg(target_arch = "mips")]
361         let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
362         #[cfg(target_arch = "mips64")]
363         let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
364 
365         // On all 64-bit platforms, the 64-bit `clock_gettime` symbols are
366         // always available.
367         #[cfg(target_pointer_width = "64")]
368         let ok = true;
369 
370         // On some 32-bit platforms, the 64-bit `clock_gettime` symbols are not
371         // available on older kernel versions.
372         #[cfg(any(target_arch = "arm", target_arch = "mips", target_arch = "x86"))]
373         let ok = !ptr.is_null();
374 
375         if ok {
376             assert!(!ptr.is_null());
377 
378             // Safety: Store the computed function addresses in static storage
379             // so that we don't need to compute it again (but if we do, it doesn't
380             // hurt anything).
381             unsafe {
382                 CLOCK_GETTIME.store(ptr.cast(), Relaxed);
383             }
384         }
385 
386         // On x86, also look up the vsyscall entry point.
387         #[cfg(target_arch = "x86")]
388         {
389             let ptr = vdso.sym(cstr!("LINUX_2.5"), cstr!("__kernel_vsyscall"));
390             assert!(!ptr.is_null());
391 
392             // Safety: As above, store the computed function addresses in
393             // static storage.
394             unsafe {
395                 SYSCALL.store(ptr.cast(), Relaxed);
396             }
397         }
398     }
399 }
400