• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "src/__support/threads/thread.h"
10 #include "config/linux/app.h"
11 #include "src/__support/CPP/atomic.h"
12 #include "src/__support/CPP/string_view.h"
13 #include "src/__support/CPP/stringstream.h"
14 #include "src/__support/OSUtil/syscall.h" // For syscall functions.
15 #include "src/__support/common.h"
16 #include "src/__support/error_or.h"
17 #include "src/__support/threads/linux/futex_utils.h" // For FutexWordType
18 #include "src/errno/libc_errno.h"                    // For error macros
19 
20 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
21 #include <arm_acle.h>
22 #endif
23 
24 #include <fcntl.h>
25 #include <linux/param.h> // For EXEC_PAGESIZE.
26 #include <linux/prctl.h> // For PR_SET_NAME
27 #include <linux/sched.h> // For CLONE_* flags.
28 #include <stdint.h>
29 #include <sys/mman.h>    // For PROT_* and MAP_* definitions.
30 #include <sys/syscall.h> // For syscall numbers.
31 
32 namespace LIBC_NAMESPACE {
33 
34 #ifdef SYS_mmap2
35 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
36 #elif defined(SYS_mmap)
37 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
38 #else
39 #error "mmap or mmap2 syscalls not available."
40 #endif
41 
42 static constexpr size_t NAME_SIZE_MAX = 16; // Includes the null terminator
43 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234;
44 static constexpr unsigned CLONE_SYSCALL_FLAGS =
45     CLONE_VM        // Share the memory space with the parent.
46     | CLONE_FS      // Share the file system with the parent.
47     | CLONE_FILES   // Share the files with the parent.
48     | CLONE_SIGHAND // Share the signal handlers with the parent.
49     | CLONE_THREAD  // Same thread group as the parent.
50     | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
51                     // values
52     | CLONE_PARENT_SETTID  // Set child thread ID in |ptid| of the parent.
53     | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
54                            // wake the joining thread.
55     | CLONE_SETTLS;        // Setup the thread pointer of the new thread.
56 
57 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
58 #define CLONE_RESULT_REGISTER "x0"
59 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
60 #define CLONE_RESULT_REGISTER "t0"
61 #elif defined(LIBC_TARGET_ARCH_IS_X86_64)
62 #define CLONE_RESULT_REGISTER "rax"
63 #else
64 #error "CLONE_RESULT_REGISTER not defined for your target architecture"
65 #endif
66 
add_no_overflow(size_t lhs,size_t rhs)67 static constexpr ErrorOr<size_t> add_no_overflow(size_t lhs, size_t rhs) {
68   if (lhs > SIZE_MAX - rhs)
69     return Error{EINVAL};
70   if (rhs > SIZE_MAX - lhs)
71     return Error{EINVAL};
72   return lhs + rhs;
73 }
74 
round_to_page(size_t v)75 static constexpr ErrorOr<size_t> round_to_page(size_t v) {
76   auto vp_or_err = add_no_overflow(v, EXEC_PAGESIZE - 1);
77   if (!vp_or_err)
78     return vp_or_err;
79 
80   return vp_or_err.value() & -EXEC_PAGESIZE;
81 }
82 
alloc_stack(size_t stacksize,size_t guardsize)83 LIBC_INLINE ErrorOr<void *> alloc_stack(size_t stacksize, size_t guardsize) {
84 
85   // Guard needs to be mapped with PROT_NONE
86   int prot = guardsize ? PROT_NONE : PROT_READ | PROT_WRITE;
87   auto size_or_err = add_no_overflow(stacksize, guardsize);
88   if (!size_or_err)
89     return Error{int(size_or_err.error())};
90   size_t size = size_or_err.value();
91 
92   // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps
93   // future-proof.
94   long mmap_result = LIBC_NAMESPACE::syscall_impl<long>(
95       MMAP_SYSCALL_NUMBER,
96       0, // No special address
97       size, prot,
98       MAP_ANONYMOUS | MAP_PRIVATE, // Process private.
99       -1,                          // Not backed by any file
100       0                            // No offset
101   );
102   if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size))
103     return Error{int(-mmap_result)};
104 
105   if (guardsize) {
106     // Give read/write permissions to actual stack.
107     // TODO: We are assuming stack growsdown here.
108     long result = LIBC_NAMESPACE::syscall_impl<long>(
109         SYS_mprotect, mmap_result + guardsize, stacksize,
110         PROT_READ | PROT_WRITE);
111 
112     if (result != 0)
113       return Error{int(-result)};
114   }
115   mmap_result += guardsize;
116   return reinterpret_cast<void *>(mmap_result);
117 }
118 
119 // This must always be inlined as we may be freeing the calling threads stack in
120 // which case a normal return from the top the stack would cause an invalid
121 // memory read.
122 [[gnu::always_inline]] LIBC_INLINE void
free_stack(void * stack,size_t stacksize,size_t guardsize)123 free_stack(void *stack, size_t stacksize, size_t guardsize) {
124   uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
125   stackaddr -= guardsize;
126   stack = reinterpret_cast<void *>(stackaddr);
127   LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, stack, stacksize + guardsize);
128 }
129 
130 struct Thread;
131 
132 // We align the start args to 16-byte boundary as we adjust the allocated
133 // stack memory with its size. We want the adjusted address to be at a
134 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
135 // If different architecture in future requires higher alignment, then we
136 // can add a platform specific alignment spec.
137 struct alignas(STACK_ALIGNMENT) StartArgs {
138   ThreadAttributes *thread_attrib;
139   ThreadRunner runner;
140   void *arg;
141 };
142 
143 // This must always be inlined as we may be freeing the calling threads stack in
144 // which case a normal return from the top the stack would cause an invalid
145 // memory read.
146 [[gnu::always_inline]] LIBC_INLINE void
cleanup_thread_resources(ThreadAttributes * attrib)147 cleanup_thread_resources(ThreadAttributes *attrib) {
148   // Cleanup the TLS before the stack as the TLS information is stored on
149   // the stack.
150   cleanup_tls(attrib->tls, attrib->tls_size);
151   if (attrib->owned_stack)
152     free_stack(attrib->stack, attrib->stacksize, attrib->guardsize);
153 }
154 
get_start_args_addr()155 [[gnu::always_inline]] LIBC_INLINE uintptr_t get_start_args_addr() {
156 // NOTE: For __builtin_frame_address to work reliably across compilers,
157 // architectures and various optimization levels, the TU including this file
158 // should be compiled with -fno-omit-frame-pointer.
159 #ifdef LIBC_TARGET_ARCH_IS_X86_64
160   return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
161          // The x86_64 call instruction pushes resume address on to the stack.
162          // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
163          // on to the stack. So, we have to step past two 64-bit values to get
164          // to the start args.
165          + sizeof(uintptr_t) * 2;
166 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
167   // The frame pointer after cloning the new thread in the Thread::run method
168   // is set to the stack pointer where start args are stored. So, we fetch
169   // from there.
170   return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
171 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
172   // The current frame pointer is the previous stack pointer where the start
173   // args are stored.
174   return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
175 #endif
176 }
177 
start_thread()178 [[gnu::noinline]] void start_thread() {
179   auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
180   auto *attrib = start_args->thread_attrib;
181   self.attrib = attrib;
182   self.attrib->atexit_callback_mgr = internal::get_thread_atexit_callback_mgr();
183 
184   if (attrib->style == ThreadStyle::POSIX) {
185     attrib->retval.posix_retval =
186         start_args->runner.posix_runner(start_args->arg);
187     thread_exit(ThreadReturnValue(attrib->retval.posix_retval),
188                 ThreadStyle::POSIX);
189   } else {
190     attrib->retval.stdc_retval =
191         start_args->runner.stdc_runner(start_args->arg);
192     thread_exit(ThreadReturnValue(attrib->retval.stdc_retval),
193                 ThreadStyle::STDC);
194   }
195 }
196 
run(ThreadStyle style,ThreadRunner runner,void * arg,void * stack,size_t stacksize,size_t guardsize,bool detached)197 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
198                 size_t stacksize, size_t guardsize, bool detached) {
199   bool owned_stack = false;
200   if (stack == nullptr) {
201     // TODO: Should we return EINVAL here? Should we have a generic concept of a
202     //       minimum stacksize (like 16384 for pthread).
203     if (stacksize == 0)
204       stacksize = DEFAULT_STACKSIZE;
205     // Roundup stacksize/guardsize to page size.
206     // TODO: Should be also add sizeof(ThreadAttribute) and other internal
207     //       meta data?
208     auto round_or_err = round_to_page(guardsize);
209     if (!round_or_err)
210       return round_or_err.error();
211     guardsize = round_or_err.value();
212 
213     round_or_err = round_to_page(stacksize);
214     if (!round_or_err)
215       return round_or_err.error();
216 
217     stacksize = round_or_err.value();
218     auto alloc = alloc_stack(stacksize, guardsize);
219     if (!alloc)
220       return alloc.error();
221     else
222       stack = alloc.value();
223     owned_stack = true;
224   }
225 
226   // Validate that stack/stacksize are validly aligned.
227   uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
228   if ((stackaddr % STACK_ALIGNMENT != 0) ||
229       ((stackaddr + stacksize) % STACK_ALIGNMENT != 0)) {
230     if (owned_stack)
231       free_stack(stack, stacksize, guardsize);
232     return EINVAL;
233   }
234 
235   TLSDescriptor tls;
236   init_tls(tls);
237 
238   // When the new thread is spawned by the kernel, the new thread gets the
239   // stack we pass to the clone syscall. However, this stack is empty and does
240   // not have any local vars present in this function. Hence, one cannot
241   // pass arguments to the thread start function, or use any local vars from
242   // here. So, we pack them into the new stack from where the thread can sniff
243   // them out.
244   //
245   // Likewise, the actual thread state information is also stored on the
246   // stack memory.
247 
248   static constexpr size_t INTERNAL_STACK_DATA_SIZE =
249       sizeof(StartArgs) + sizeof(ThreadAttributes) + sizeof(Futex);
250 
251   // This is pretty arbitrary, but at the moment we don't adjust user provided
252   // stacksize (or default) to account for this data as its assumed minimal. If
253   // this assert starts failing we probably should. Likewise if we can't bound
254   // this we may overflow when we subtract it from the top of the stack.
255   static_assert(INTERNAL_STACK_DATA_SIZE < EXEC_PAGESIZE);
256 
257   // TODO: We are assuming stack growsdown here.
258   auto adjusted_stack_or_err =
259       add_no_overflow(reinterpret_cast<uintptr_t>(stack), stacksize);
260   if (!adjusted_stack_or_err) {
261     cleanup_tls(tls.addr, tls.size);
262     if (owned_stack)
263       free_stack(stack, stacksize, guardsize);
264     return adjusted_stack_or_err.error();
265   }
266 
267   uintptr_t adjusted_stack =
268       adjusted_stack_or_err.value() - INTERNAL_STACK_DATA_SIZE;
269   adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1);
270 
271   auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
272 
273   attrib =
274       reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs));
275   attrib->style = style;
276   attrib->detach_state =
277       uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE);
278   attrib->stack = stack;
279   attrib->stacksize = stacksize;
280   attrib->guardsize = guardsize;
281   attrib->owned_stack = owned_stack;
282   attrib->tls = tls.addr;
283   attrib->tls_size = tls.size;
284 
285   start_args->thread_attrib = attrib;
286   start_args->runner = runner;
287   start_args->arg = arg;
288 
289   auto clear_tid = reinterpret_cast<Futex *>(
290       adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes));
291   clear_tid->set(CLEAR_TID_VALUE);
292   attrib->platform_data = clear_tid;
293 
294   // The clone syscall takes arguments in an architecture specific order.
295   // Also, we want the result of the syscall to be in a register as the child
296   // thread gets a completely different stack after it is created. The stack
297   // variables from this function will not be availalbe to the child thread.
298 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
299   long register clone_result asm(CLONE_RESULT_REGISTER);
300   clone_result = LIBC_NAMESPACE::syscall_impl<long>(
301       SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
302       &attrib->tid,    // The address where the child tid is written
303       &clear_tid->val, // The futex where the child thread status is signalled
304       tls.tp           // The thread pointer value for the new thread.
305   );
306 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) ||                                  \
307     defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
308   long register clone_result asm(CLONE_RESULT_REGISTER);
309   clone_result = LIBC_NAMESPACE::syscall_impl<long>(
310       SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
311       &attrib->tid,   // The address where the child tid is written
312       tls.tp,         // The thread pointer value for the new thread.
313       &clear_tid->val // The futex where the child thread status is signalled
314   );
315 #else
316 #error "Unsupported architecture for the clone syscall."
317 #endif
318 
319   if (clone_result == 0) {
320 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
321     // We set the frame pointer to be the same as the "sp" so that start args
322     // can be sniffed out from start_thread.
323 #ifdef __clang__
324     // GCC does not currently implement __arm_wsr64/__arm_rsr64.
325     __arm_wsr64("x29", __arm_rsr64("sp"));
326 #else
327     asm volatile("mov x29, sp");
328 #endif
329 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
330     asm volatile("mv fp, sp");
331 #endif
332     start_thread();
333   } else if (clone_result < 0) {
334     cleanup_thread_resources(attrib);
335     return static_cast<int>(-clone_result);
336   }
337 
338   return 0;
339 }
340 
join(ThreadReturnValue & retval)341 int Thread::join(ThreadReturnValue &retval) {
342   wait();
343 
344   if (attrib->style == ThreadStyle::POSIX)
345     retval.posix_retval = attrib->retval.posix_retval;
346   else
347     retval.stdc_retval = attrib->retval.stdc_retval;
348 
349   cleanup_thread_resources(attrib);
350 
351   return 0;
352 }
353 
detach()354 int Thread::detach() {
355   uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
356   if (attrib->detach_state.compare_exchange_strong(
357           joinable_state, uint32_t(DetachState::DETACHED))) {
358     return int(DetachType::SIMPLE);
359   }
360 
361   // If the thread was already detached, then the detach method should not
362   // be called at all. If the thread is exiting, then we wait for it to exit
363   // and free up resources.
364   wait();
365 
366   cleanup_thread_resources(attrib);
367 
368   return int(DetachType::CLEANUP);
369 }
370 
wait()371 void Thread::wait() {
372   // The kernel should set the value at the clear tid address to zero.
373   // If not, it is a spurious wake and we should continue to wait on
374   // the futex.
375   auto *clear_tid = reinterpret_cast<Futex *>(attrib->platform_data);
376   // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
377   // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
378   while (clear_tid->load() != 0)
379     clear_tid->wait(CLEAR_TID_VALUE, cpp::nullopt, true);
380 }
381 
operator ==(const Thread & thread) const382 bool Thread::operator==(const Thread &thread) const {
383   return attrib->tid == thread.attrib->tid;
384 }
385 
386 static constexpr cpp::string_view THREAD_NAME_PATH_PREFIX("/proc/self/task/");
387 static constexpr size_t THREAD_NAME_PATH_SIZE =
388     THREAD_NAME_PATH_PREFIX.size() +
389     IntegerToString<int>::buffer_size() + // Size of tid
390     1 +                                   // For '/' character
391     5; // For the file name "comm" and the nullterminator.
392 
construct_thread_name_file_path(cpp::StringStream & stream,int tid)393 static void construct_thread_name_file_path(cpp::StringStream &stream,
394                                             int tid) {
395   stream << THREAD_NAME_PATH_PREFIX << tid << '/' << cpp::string_view("comm")
396          << cpp::StringStream::ENDS;
397 }
398 
set_name(const cpp::string_view & name)399 int Thread::set_name(const cpp::string_view &name) {
400   if (name.size() >= NAME_SIZE_MAX)
401     return ERANGE;
402 
403   if (*this == self) {
404     // If we are setting the name of the current thread, then we can
405     // use the syscall to set the name.
406     int retval =
407         LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_SET_NAME, name.data());
408     if (retval < 0)
409       return -retval;
410     else
411       return 0;
412   }
413 
414   char path_name_buffer[THREAD_NAME_PATH_SIZE];
415   cpp::StringStream path_stream(path_name_buffer);
416   construct_thread_name_file_path(path_stream, attrib->tid);
417 #ifdef SYS_open
418   int fd =
419       LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDWR);
420 #else
421   int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
422                                              path_name_buffer, O_RDWR);
423 #endif
424   if (fd < 0)
425     return -fd;
426 
427   int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_write, fd, name.data(),
428                                                  name.size());
429   LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd);
430 
431   if (retval < 0)
432     return -retval;
433   else if (retval != int(name.size()))
434     return EIO;
435   else
436     return 0;
437 }
438 
get_name(cpp::StringStream & name) const439 int Thread::get_name(cpp::StringStream &name) const {
440   if (name.bufsize() < NAME_SIZE_MAX)
441     return ERANGE;
442 
443   char name_buffer[NAME_SIZE_MAX];
444 
445   if (*this == self) {
446     // If we are getting the name of the current thread, then we can
447     // use the syscall to get the name.
448     int retval =
449         LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_GET_NAME, name_buffer);
450     if (retval < 0)
451       return -retval;
452     name << name_buffer << cpp::StringStream::ENDS;
453     return 0;
454   }
455 
456   char path_name_buffer[THREAD_NAME_PATH_SIZE];
457   cpp::StringStream path_stream(path_name_buffer);
458   construct_thread_name_file_path(path_stream, attrib->tid);
459 #ifdef SYS_open
460   int fd =
461       LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDONLY);
462 #else
463   int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
464                                              path_name_buffer, O_RDONLY);
465 #endif
466   if (fd < 0)
467     return -fd;
468 
469   int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_read, fd, name_buffer,
470                                                  NAME_SIZE_MAX);
471   LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd);
472   if (retval < 0)
473     return -retval;
474   if (retval == NAME_SIZE_MAX)
475     return ERANGE;
476   if (name_buffer[retval - 1] == '\n')
477     name_buffer[retval - 1] = '\0';
478   else
479     name_buffer[retval] = '\0';
480   name << name_buffer << cpp::StringStream::ENDS;
481   return 0;
482 }
483 
thread_exit(ThreadReturnValue retval,ThreadStyle style)484 void thread_exit(ThreadReturnValue retval, ThreadStyle style) {
485   auto attrib = self.attrib;
486 
487   // The very first thing we do is to call the thread's atexit callbacks.
488   // These callbacks could be the ones registered by the language runtimes,
489   // for example, the destructors of thread local objects. They can also
490   // be destructors of the TSS objects set using API like pthread_setspecific.
491   // NOTE: We cannot call the atexit callbacks as part of the
492   // cleanup_thread_resources function as that function can be called from a
493   // different thread. The destructors of thread local and TSS objects should
494   // be called by the thread which owns them.
495   internal::call_atexit_callbacks(attrib);
496 
497   uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
498   if (!attrib->detach_state.compare_exchange_strong(
499           joinable_state, uint32_t(DetachState::EXITING))) {
500     // Thread is detached so cleanup the resources.
501     cleanup_thread_resources(attrib);
502 
503     // Set the CLEAR_TID address to nullptr to prevent the kernel
504     // from signalling at a non-existent futex location.
505     LIBC_NAMESPACE::syscall_impl<long>(SYS_set_tid_address, 0);
506     // Return value for detached thread should be unused. We need to avoid
507     // referencing `style` or `retval.*` because they may be stored on the stack
508     // and we have deallocated our stack!
509     LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 0);
510     __builtin_unreachable();
511   }
512 
513   if (style == ThreadStyle::POSIX)
514     LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.posix_retval);
515   else
516     LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.stdc_retval);
517   __builtin_unreachable();
518 }
519 
520 } // namespace LIBC_NAMESPACE
521