1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "src/__support/threads/thread.h"
10 #include "config/linux/app.h"
11 #include "src/__support/CPP/atomic.h"
12 #include "src/__support/CPP/string_view.h"
13 #include "src/__support/CPP/stringstream.h"
14 #include "src/__support/OSUtil/syscall.h" // For syscall functions.
15 #include "src/__support/common.h"
16 #include "src/__support/error_or.h"
17 #include "src/__support/threads/linux/futex_utils.h" // For FutexWordType
18 #include "src/errno/libc_errno.h" // For error macros
19
20 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
21 #include <arm_acle.h>
22 #endif
23
24 #include <fcntl.h>
25 #include <linux/param.h> // For EXEC_PAGESIZE.
26 #include <linux/prctl.h> // For PR_SET_NAME
27 #include <linux/sched.h> // For CLONE_* flags.
28 #include <stdint.h>
29 #include <sys/mman.h> // For PROT_* and MAP_* definitions.
30 #include <sys/syscall.h> // For syscall numbers.
31
32 namespace LIBC_NAMESPACE {
33
34 #ifdef SYS_mmap2
35 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
36 #elif defined(SYS_mmap)
37 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
38 #else
39 #error "mmap or mmap2 syscalls not available."
40 #endif
41
42 static constexpr size_t NAME_SIZE_MAX = 16; // Includes the null terminator
43 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234;
44 static constexpr unsigned CLONE_SYSCALL_FLAGS =
45 CLONE_VM // Share the memory space with the parent.
46 | CLONE_FS // Share the file system with the parent.
47 | CLONE_FILES // Share the files with the parent.
48 | CLONE_SIGHAND // Share the signal handlers with the parent.
49 | CLONE_THREAD // Same thread group as the parent.
50 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
51 // values
52 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent.
53 | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
54 // wake the joining thread.
55 | CLONE_SETTLS; // Setup the thread pointer of the new thread.
56
57 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
58 #define CLONE_RESULT_REGISTER "x0"
59 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
60 #define CLONE_RESULT_REGISTER "t0"
61 #elif defined(LIBC_TARGET_ARCH_IS_X86_64)
62 #define CLONE_RESULT_REGISTER "rax"
63 #else
64 #error "CLONE_RESULT_REGISTER not defined for your target architecture"
65 #endif
66
add_no_overflow(size_t lhs,size_t rhs)67 static constexpr ErrorOr<size_t> add_no_overflow(size_t lhs, size_t rhs) {
68 if (lhs > SIZE_MAX - rhs)
69 return Error{EINVAL};
70 if (rhs > SIZE_MAX - lhs)
71 return Error{EINVAL};
72 return lhs + rhs;
73 }
74
round_to_page(size_t v)75 static constexpr ErrorOr<size_t> round_to_page(size_t v) {
76 auto vp_or_err = add_no_overflow(v, EXEC_PAGESIZE - 1);
77 if (!vp_or_err)
78 return vp_or_err;
79
80 return vp_or_err.value() & -EXEC_PAGESIZE;
81 }
82
alloc_stack(size_t stacksize,size_t guardsize)83 LIBC_INLINE ErrorOr<void *> alloc_stack(size_t stacksize, size_t guardsize) {
84
85 // Guard needs to be mapped with PROT_NONE
86 int prot = guardsize ? PROT_NONE : PROT_READ | PROT_WRITE;
87 auto size_or_err = add_no_overflow(stacksize, guardsize);
88 if (!size_or_err)
89 return Error{int(size_or_err.error())};
90 size_t size = size_or_err.value();
91
92 // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps
93 // future-proof.
94 long mmap_result = LIBC_NAMESPACE::syscall_impl<long>(
95 MMAP_SYSCALL_NUMBER,
96 0, // No special address
97 size, prot,
98 MAP_ANONYMOUS | MAP_PRIVATE, // Process private.
99 -1, // Not backed by any file
100 0 // No offset
101 );
102 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size))
103 return Error{int(-mmap_result)};
104
105 if (guardsize) {
106 // Give read/write permissions to actual stack.
107 // TODO: We are assuming stack growsdown here.
108 long result = LIBC_NAMESPACE::syscall_impl<long>(
109 SYS_mprotect, mmap_result + guardsize, stacksize,
110 PROT_READ | PROT_WRITE);
111
112 if (result != 0)
113 return Error{int(-result)};
114 }
115 mmap_result += guardsize;
116 return reinterpret_cast<void *>(mmap_result);
117 }
118
119 // This must always be inlined as we may be freeing the calling threads stack in
120 // which case a normal return from the top the stack would cause an invalid
121 // memory read.
122 [[gnu::always_inline]] LIBC_INLINE void
free_stack(void * stack,size_t stacksize,size_t guardsize)123 free_stack(void *stack, size_t stacksize, size_t guardsize) {
124 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
125 stackaddr -= guardsize;
126 stack = reinterpret_cast<void *>(stackaddr);
127 LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, stack, stacksize + guardsize);
128 }
129
130 struct Thread;
131
132 // We align the start args to 16-byte boundary as we adjust the allocated
133 // stack memory with its size. We want the adjusted address to be at a
134 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
135 // If different architecture in future requires higher alignment, then we
136 // can add a platform specific alignment spec.
137 struct alignas(STACK_ALIGNMENT) StartArgs {
138 ThreadAttributes *thread_attrib;
139 ThreadRunner runner;
140 void *arg;
141 };
142
143 // This must always be inlined as we may be freeing the calling threads stack in
144 // which case a normal return from the top the stack would cause an invalid
145 // memory read.
146 [[gnu::always_inline]] LIBC_INLINE void
cleanup_thread_resources(ThreadAttributes * attrib)147 cleanup_thread_resources(ThreadAttributes *attrib) {
148 // Cleanup the TLS before the stack as the TLS information is stored on
149 // the stack.
150 cleanup_tls(attrib->tls, attrib->tls_size);
151 if (attrib->owned_stack)
152 free_stack(attrib->stack, attrib->stacksize, attrib->guardsize);
153 }
154
get_start_args_addr()155 [[gnu::always_inline]] LIBC_INLINE uintptr_t get_start_args_addr() {
156 // NOTE: For __builtin_frame_address to work reliably across compilers,
157 // architectures and various optimization levels, the TU including this file
158 // should be compiled with -fno-omit-frame-pointer.
159 #ifdef LIBC_TARGET_ARCH_IS_X86_64
160 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
161 // The x86_64 call instruction pushes resume address on to the stack.
162 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
163 // on to the stack. So, we have to step past two 64-bit values to get
164 // to the start args.
165 + sizeof(uintptr_t) * 2;
166 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
167 // The frame pointer after cloning the new thread in the Thread::run method
168 // is set to the stack pointer where start args are stored. So, we fetch
169 // from there.
170 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
171 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
172 // The current frame pointer is the previous stack pointer where the start
173 // args are stored.
174 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
175 #endif
176 }
177
start_thread()178 [[gnu::noinline]] void start_thread() {
179 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
180 auto *attrib = start_args->thread_attrib;
181 self.attrib = attrib;
182 self.attrib->atexit_callback_mgr = internal::get_thread_atexit_callback_mgr();
183
184 if (attrib->style == ThreadStyle::POSIX) {
185 attrib->retval.posix_retval =
186 start_args->runner.posix_runner(start_args->arg);
187 thread_exit(ThreadReturnValue(attrib->retval.posix_retval),
188 ThreadStyle::POSIX);
189 } else {
190 attrib->retval.stdc_retval =
191 start_args->runner.stdc_runner(start_args->arg);
192 thread_exit(ThreadReturnValue(attrib->retval.stdc_retval),
193 ThreadStyle::STDC);
194 }
195 }
196
run(ThreadStyle style,ThreadRunner runner,void * arg,void * stack,size_t stacksize,size_t guardsize,bool detached)197 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
198 size_t stacksize, size_t guardsize, bool detached) {
199 bool owned_stack = false;
200 if (stack == nullptr) {
201 // TODO: Should we return EINVAL here? Should we have a generic concept of a
202 // minimum stacksize (like 16384 for pthread).
203 if (stacksize == 0)
204 stacksize = DEFAULT_STACKSIZE;
205 // Roundup stacksize/guardsize to page size.
206 // TODO: Should be also add sizeof(ThreadAttribute) and other internal
207 // meta data?
208 auto round_or_err = round_to_page(guardsize);
209 if (!round_or_err)
210 return round_or_err.error();
211 guardsize = round_or_err.value();
212
213 round_or_err = round_to_page(stacksize);
214 if (!round_or_err)
215 return round_or_err.error();
216
217 stacksize = round_or_err.value();
218 auto alloc = alloc_stack(stacksize, guardsize);
219 if (!alloc)
220 return alloc.error();
221 else
222 stack = alloc.value();
223 owned_stack = true;
224 }
225
226 // Validate that stack/stacksize are validly aligned.
227 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
228 if ((stackaddr % STACK_ALIGNMENT != 0) ||
229 ((stackaddr + stacksize) % STACK_ALIGNMENT != 0)) {
230 if (owned_stack)
231 free_stack(stack, stacksize, guardsize);
232 return EINVAL;
233 }
234
235 TLSDescriptor tls;
236 init_tls(tls);
237
238 // When the new thread is spawned by the kernel, the new thread gets the
239 // stack we pass to the clone syscall. However, this stack is empty and does
240 // not have any local vars present in this function. Hence, one cannot
241 // pass arguments to the thread start function, or use any local vars from
242 // here. So, we pack them into the new stack from where the thread can sniff
243 // them out.
244 //
245 // Likewise, the actual thread state information is also stored on the
246 // stack memory.
247
248 static constexpr size_t INTERNAL_STACK_DATA_SIZE =
249 sizeof(StartArgs) + sizeof(ThreadAttributes) + sizeof(Futex);
250
251 // This is pretty arbitrary, but at the moment we don't adjust user provided
252 // stacksize (or default) to account for this data as its assumed minimal. If
253 // this assert starts failing we probably should. Likewise if we can't bound
254 // this we may overflow when we subtract it from the top of the stack.
255 static_assert(INTERNAL_STACK_DATA_SIZE < EXEC_PAGESIZE);
256
257 // TODO: We are assuming stack growsdown here.
258 auto adjusted_stack_or_err =
259 add_no_overflow(reinterpret_cast<uintptr_t>(stack), stacksize);
260 if (!adjusted_stack_or_err) {
261 cleanup_tls(tls.addr, tls.size);
262 if (owned_stack)
263 free_stack(stack, stacksize, guardsize);
264 return adjusted_stack_or_err.error();
265 }
266
267 uintptr_t adjusted_stack =
268 adjusted_stack_or_err.value() - INTERNAL_STACK_DATA_SIZE;
269 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1);
270
271 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
272
273 attrib =
274 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs));
275 attrib->style = style;
276 attrib->detach_state =
277 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE);
278 attrib->stack = stack;
279 attrib->stacksize = stacksize;
280 attrib->guardsize = guardsize;
281 attrib->owned_stack = owned_stack;
282 attrib->tls = tls.addr;
283 attrib->tls_size = tls.size;
284
285 start_args->thread_attrib = attrib;
286 start_args->runner = runner;
287 start_args->arg = arg;
288
289 auto clear_tid = reinterpret_cast<Futex *>(
290 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes));
291 clear_tid->set(CLEAR_TID_VALUE);
292 attrib->platform_data = clear_tid;
293
294 // The clone syscall takes arguments in an architecture specific order.
295 // Also, we want the result of the syscall to be in a register as the child
296 // thread gets a completely different stack after it is created. The stack
297 // variables from this function will not be availalbe to the child thread.
298 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
299 long register clone_result asm(CLONE_RESULT_REGISTER);
300 clone_result = LIBC_NAMESPACE::syscall_impl<long>(
301 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
302 &attrib->tid, // The address where the child tid is written
303 &clear_tid->val, // The futex where the child thread status is signalled
304 tls.tp // The thread pointer value for the new thread.
305 );
306 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \
307 defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
308 long register clone_result asm(CLONE_RESULT_REGISTER);
309 clone_result = LIBC_NAMESPACE::syscall_impl<long>(
310 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
311 &attrib->tid, // The address where the child tid is written
312 tls.tp, // The thread pointer value for the new thread.
313 &clear_tid->val // The futex where the child thread status is signalled
314 );
315 #else
316 #error "Unsupported architecture for the clone syscall."
317 #endif
318
319 if (clone_result == 0) {
320 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
321 // We set the frame pointer to be the same as the "sp" so that start args
322 // can be sniffed out from start_thread.
323 #ifdef __clang__
324 // GCC does not currently implement __arm_wsr64/__arm_rsr64.
325 __arm_wsr64("x29", __arm_rsr64("sp"));
326 #else
327 asm volatile("mov x29, sp");
328 #endif
329 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
330 asm volatile("mv fp, sp");
331 #endif
332 start_thread();
333 } else if (clone_result < 0) {
334 cleanup_thread_resources(attrib);
335 return static_cast<int>(-clone_result);
336 }
337
338 return 0;
339 }
340
join(ThreadReturnValue & retval)341 int Thread::join(ThreadReturnValue &retval) {
342 wait();
343
344 if (attrib->style == ThreadStyle::POSIX)
345 retval.posix_retval = attrib->retval.posix_retval;
346 else
347 retval.stdc_retval = attrib->retval.stdc_retval;
348
349 cleanup_thread_resources(attrib);
350
351 return 0;
352 }
353
detach()354 int Thread::detach() {
355 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
356 if (attrib->detach_state.compare_exchange_strong(
357 joinable_state, uint32_t(DetachState::DETACHED))) {
358 return int(DetachType::SIMPLE);
359 }
360
361 // If the thread was already detached, then the detach method should not
362 // be called at all. If the thread is exiting, then we wait for it to exit
363 // and free up resources.
364 wait();
365
366 cleanup_thread_resources(attrib);
367
368 return int(DetachType::CLEANUP);
369 }
370
wait()371 void Thread::wait() {
372 // The kernel should set the value at the clear tid address to zero.
373 // If not, it is a spurious wake and we should continue to wait on
374 // the futex.
375 auto *clear_tid = reinterpret_cast<Futex *>(attrib->platform_data);
376 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
377 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
378 while (clear_tid->load() != 0)
379 clear_tid->wait(CLEAR_TID_VALUE, cpp::nullopt, true);
380 }
381
operator ==(const Thread & thread) const382 bool Thread::operator==(const Thread &thread) const {
383 return attrib->tid == thread.attrib->tid;
384 }
385
386 static constexpr cpp::string_view THREAD_NAME_PATH_PREFIX("/proc/self/task/");
387 static constexpr size_t THREAD_NAME_PATH_SIZE =
388 THREAD_NAME_PATH_PREFIX.size() +
389 IntegerToString<int>::buffer_size() + // Size of tid
390 1 + // For '/' character
391 5; // For the file name "comm" and the nullterminator.
392
construct_thread_name_file_path(cpp::StringStream & stream,int tid)393 static void construct_thread_name_file_path(cpp::StringStream &stream,
394 int tid) {
395 stream << THREAD_NAME_PATH_PREFIX << tid << '/' << cpp::string_view("comm")
396 << cpp::StringStream::ENDS;
397 }
398
set_name(const cpp::string_view & name)399 int Thread::set_name(const cpp::string_view &name) {
400 if (name.size() >= NAME_SIZE_MAX)
401 return ERANGE;
402
403 if (*this == self) {
404 // If we are setting the name of the current thread, then we can
405 // use the syscall to set the name.
406 int retval =
407 LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_SET_NAME, name.data());
408 if (retval < 0)
409 return -retval;
410 else
411 return 0;
412 }
413
414 char path_name_buffer[THREAD_NAME_PATH_SIZE];
415 cpp::StringStream path_stream(path_name_buffer);
416 construct_thread_name_file_path(path_stream, attrib->tid);
417 #ifdef SYS_open
418 int fd =
419 LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDWR);
420 #else
421 int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
422 path_name_buffer, O_RDWR);
423 #endif
424 if (fd < 0)
425 return -fd;
426
427 int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_write, fd, name.data(),
428 name.size());
429 LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd);
430
431 if (retval < 0)
432 return -retval;
433 else if (retval != int(name.size()))
434 return EIO;
435 else
436 return 0;
437 }
438
get_name(cpp::StringStream & name) const439 int Thread::get_name(cpp::StringStream &name) const {
440 if (name.bufsize() < NAME_SIZE_MAX)
441 return ERANGE;
442
443 char name_buffer[NAME_SIZE_MAX];
444
445 if (*this == self) {
446 // If we are getting the name of the current thread, then we can
447 // use the syscall to get the name.
448 int retval =
449 LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_GET_NAME, name_buffer);
450 if (retval < 0)
451 return -retval;
452 name << name_buffer << cpp::StringStream::ENDS;
453 return 0;
454 }
455
456 char path_name_buffer[THREAD_NAME_PATH_SIZE];
457 cpp::StringStream path_stream(path_name_buffer);
458 construct_thread_name_file_path(path_stream, attrib->tid);
459 #ifdef SYS_open
460 int fd =
461 LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDONLY);
462 #else
463 int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
464 path_name_buffer, O_RDONLY);
465 #endif
466 if (fd < 0)
467 return -fd;
468
469 int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_read, fd, name_buffer,
470 NAME_SIZE_MAX);
471 LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd);
472 if (retval < 0)
473 return -retval;
474 if (retval == NAME_SIZE_MAX)
475 return ERANGE;
476 if (name_buffer[retval - 1] == '\n')
477 name_buffer[retval - 1] = '\0';
478 else
479 name_buffer[retval] = '\0';
480 name << name_buffer << cpp::StringStream::ENDS;
481 return 0;
482 }
483
thread_exit(ThreadReturnValue retval,ThreadStyle style)484 void thread_exit(ThreadReturnValue retval, ThreadStyle style) {
485 auto attrib = self.attrib;
486
487 // The very first thing we do is to call the thread's atexit callbacks.
488 // These callbacks could be the ones registered by the language runtimes,
489 // for example, the destructors of thread local objects. They can also
490 // be destructors of the TSS objects set using API like pthread_setspecific.
491 // NOTE: We cannot call the atexit callbacks as part of the
492 // cleanup_thread_resources function as that function can be called from a
493 // different thread. The destructors of thread local and TSS objects should
494 // be called by the thread which owns them.
495 internal::call_atexit_callbacks(attrib);
496
497 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
498 if (!attrib->detach_state.compare_exchange_strong(
499 joinable_state, uint32_t(DetachState::EXITING))) {
500 // Thread is detached so cleanup the resources.
501 cleanup_thread_resources(attrib);
502
503 // Set the CLEAR_TID address to nullptr to prevent the kernel
504 // from signalling at a non-existent futex location.
505 LIBC_NAMESPACE::syscall_impl<long>(SYS_set_tid_address, 0);
506 // Return value for detached thread should be unused. We need to avoid
507 // referencing `style` or `retval.*` because they may be stored on the stack
508 // and we have deallocated our stack!
509 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 0);
510 __builtin_unreachable();
511 }
512
513 if (style == ThreadStyle::POSIX)
514 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.posix_retval);
515 else
516 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.stdc_retval);
517 __builtin_unreachable();
518 }
519
520 } // namespace LIBC_NAMESPACE
521