1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <pthread.h>
30
31 #include <errno.h>
32 #include <string.h>
33 #include <sys/mman.h>
34 #include <sys/prctl.h>
35 #include <sys/random.h>
36 #include <unistd.h>
37
38 #include "pthread_internal.h"
39
40 #include <async_safe/log.h>
41
42 #include "private/bionic_constants.h"
43 #include "private/bionic_defs.h"
44 #include "private/bionic_globals.h"
45 #include "private/bionic_macros.h"
46 #include "private/bionic_ssp.h"
47 #include "private/bionic_systrace.h"
48 #include "private/bionic_tls.h"
49 #include "private/ErrnoRestorer.h"
50
51 // x86 uses segment descriptors rather than a direct pointer to TLS.
52 #if defined(__i386__)
53 #include <asm/ldt.h>
54 void __init_user_desc(struct user_desc*, bool, void*);
55 #endif
56
57 // This code is used both by each new pthread and the code that initializes the main thread.
58 __attribute__((no_stack_protector))
__init_tcb(bionic_tcb * tcb,pthread_internal_t * thread)59 void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread) {
60 #ifdef TLS_SLOT_SELF
61 // On x86, slot 0 must point to itself so code can read the thread pointer by
62 // loading %fs:0 or %gs:0.
63 tcb->tls_slot(TLS_SLOT_SELF) = &tcb->tls_slot(TLS_SLOT_SELF);
64 #endif
65 tcb->tls_slot(TLS_SLOT_THREAD_ID) = thread;
66 }
67
68 __attribute__((no_stack_protector))
__init_tcb_stack_guard(bionic_tcb * tcb)69 void __init_tcb_stack_guard(bionic_tcb* tcb) {
70 // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
71 tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
72 }
73
74 __attribute__((no_stack_protector))
__init_tcb_dtv(bionic_tcb * tcb)75 void __init_tcb_dtv(bionic_tcb* tcb) {
76 // Initialize the DTV slot to a statically-allocated empty DTV. The first
77 // access to a dynamic TLS variable allocates a new DTV.
78 static const TlsDtv zero_dtv = {};
79 __set_tcb_dtv(tcb, const_cast<TlsDtv*>(&zero_dtv));
80 }
81
__init_bionic_tls_ptrs(bionic_tcb * tcb,bionic_tls * tls)82 void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
83 tcb->thread()->bionic_tls = tls;
84 tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
85 }
86
87 // Allocate a temporary bionic_tls that the dynamic linker's main thread can
88 // use while it's loading the initial set of ELF modules.
__allocate_temp_bionic_tls()89 bionic_tls* __allocate_temp_bionic_tls() {
90 size_t allocation_size = __BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE);
91 void* allocation = mmap(nullptr, allocation_size,
92 PROT_READ | PROT_WRITE,
93 MAP_PRIVATE | MAP_ANONYMOUS,
94 -1, 0);
95 if (allocation == MAP_FAILED) {
96 // Avoid strerror because it might need bionic_tls.
97 async_safe_fatal("failed to allocate bionic_tls: error %d", errno);
98 }
99 return static_cast<bionic_tls*>(allocation);
100 }
101
__free_temp_bionic_tls(bionic_tls * tls)102 void __free_temp_bionic_tls(bionic_tls* tls) {
103 munmap(tls, __BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE));
104 }
105
__init_alternate_signal_stack(pthread_internal_t * thread)106 static void __init_alternate_signal_stack(pthread_internal_t* thread) {
107 // Create and set an alternate signal stack.
108 void* stack_base = mmap(nullptr, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
109 if (stack_base != MAP_FAILED) {
110 // Create a guard to catch stack overflows in signal handlers.
111 if (mprotect(stack_base, PTHREAD_GUARD_SIZE, PROT_NONE) == -1) {
112 munmap(stack_base, SIGNAL_STACK_SIZE);
113 return;
114 }
115 stack_t ss;
116 ss.ss_sp = reinterpret_cast<uint8_t*>(stack_base) + PTHREAD_GUARD_SIZE;
117 ss.ss_size = SIGNAL_STACK_SIZE - PTHREAD_GUARD_SIZE;
118 ss.ss_flags = 0;
119 sigaltstack(&ss, nullptr);
120 thread->alternate_signal_stack = stack_base;
121
122 // We can only use const static allocated string for mapped region name, as Android kernel
123 // uses the string pointer directly when dumping /proc/pid/maps.
124 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ss.ss_sp, ss.ss_size, "thread signal stack");
125 }
126 }
127
__init_shadow_call_stack(pthread_internal_t * thread __unused)128 static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
129 #ifdef __aarch64__
130 // Allocate the stack and the guard region.
131 char* scs_guard_region = reinterpret_cast<char*>(
132 mmap(nullptr, SCS_GUARD_REGION_SIZE, 0, MAP_PRIVATE | MAP_ANON, -1, 0));
133 thread->shadow_call_stack_guard_region = scs_guard_region;
134
135 // The address is aligned to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
136 // in jmp_buf.
137 char* scs_aligned_guard_region =
138 reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
139
140 // We need to ensure that [scs_offset,scs_offset+SCS_SIZE) is in the guard region and that there
141 // is at least one unmapped page after the shadow call stack (to catch stack overflows). We can't
142 // use arc4random_uniform in init because /dev/urandom might not have been created yet.
143 size_t scs_offset =
144 (getpid() == 1) ? 0 : (arc4random_uniform(SCS_GUARD_REGION_SIZE / SCS_SIZE - 1) * SCS_SIZE);
145
146 // Make the stack readable and writable and store its address in register x18. This is
147 // deliberately the only place where the address is stored.
148 char *scs = scs_aligned_guard_region + scs_offset;
149 mprotect(scs, SCS_SIZE, PROT_READ | PROT_WRITE);
150 __asm__ __volatile__("mov x18, %0" ::"r"(scs));
151 #endif
152 }
153
__init_additional_stacks(pthread_internal_t * thread)154 void __init_additional_stacks(pthread_internal_t* thread) {
155 __init_alternate_signal_stack(thread);
156 __init_shadow_call_stack(thread);
157 }
158
__init_thread(pthread_internal_t * thread)159 int __init_thread(pthread_internal_t* thread) {
160 thread->cleanup_stack = nullptr;
161
162 if (__predict_true((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) == 0)) {
163 atomic_init(&thread->join_state, THREAD_NOT_JOINED);
164 } else {
165 atomic_init(&thread->join_state, THREAD_DETACHED);
166 }
167
168 // Set the scheduling policy/priority of the thread if necessary.
169 bool need_set = true;
170 int policy;
171 sched_param param;
172 if ((thread->attr.flags & PTHREAD_ATTR_FLAG_INHERIT) != 0) {
173 // Unless the parent has SCHED_RESET_ON_FORK set, we've already inherited from the parent.
174 policy = sched_getscheduler(0);
175 need_set = ((policy & SCHED_RESET_ON_FORK) != 0);
176 if (need_set) {
177 if (policy == -1) {
178 async_safe_format_log(ANDROID_LOG_WARN, "libc",
179 "pthread_create sched_getscheduler failed: %s", strerror(errno));
180 return errno;
181 }
182 if (sched_getparam(0, ¶m) == -1) {
183 async_safe_format_log(ANDROID_LOG_WARN, "libc",
184 "pthread_create sched_getparam failed: %s", strerror(errno));
185 return errno;
186 }
187 }
188 } else {
189 policy = thread->attr.sched_policy;
190 param.sched_priority = thread->attr.sched_priority;
191 }
192 // Backwards compatibility: before P, Android didn't have pthread_attr_setinheritsched,
193 // and our behavior was neither of the POSIX behaviors.
194 if ((thread->attr.flags & (PTHREAD_ATTR_FLAG_INHERIT|PTHREAD_ATTR_FLAG_EXPLICIT)) == 0) {
195 need_set = (thread->attr.sched_policy != SCHED_NORMAL);
196 }
197 if (need_set) {
198 if (sched_setscheduler(thread->tid, policy, ¶m) == -1) {
199 async_safe_format_log(ANDROID_LOG_WARN, "libc",
200 "pthread_create sched_setscheduler(%d, {%d}) call failed: %s", policy,
201 param.sched_priority, strerror(errno));
202 #if defined(__LP64__)
203 // For backwards compatibility reasons, we only report failures on 64-bit devices.
204 return errno;
205 #endif
206 }
207 }
208
209 return 0;
210 }
211
212
213 // Allocate a thread's primary mapping. This mapping includes static TLS and
214 // optionally a stack. Static TLS includes ELF TLS segments and the bionic_tls
215 // struct.
216 //
217 // The stack_guard_size must be a multiple of the PAGE_SIZE.
__allocate_thread_mapping(size_t stack_size,size_t stack_guard_size)218 ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size) {
219 const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
220
221 // Allocate in order: stack guard, stack, static TLS, guard page.
222 size_t mmap_size;
223 if (__builtin_add_overflow(stack_size, stack_guard_size, &mmap_size)) return {};
224 if (__builtin_add_overflow(mmap_size, layout.size(), &mmap_size)) return {};
225 if (__builtin_add_overflow(mmap_size, PTHREAD_GUARD_SIZE, &mmap_size)) return {};
226
227 // Align the result to a page size.
228 const size_t unaligned_size = mmap_size;
229 mmap_size = __BIONIC_ALIGN(mmap_size, PAGE_SIZE);
230 if (mmap_size < unaligned_size) return {};
231
232 // Create a new private anonymous map. Make the entire mapping PROT_NONE, then carve out a
233 // read+write area in the middle.
234 const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
235 char* const space = static_cast<char*>(mmap(nullptr, mmap_size, PROT_NONE, flags, -1, 0));
236 if (space == MAP_FAILED) {
237 async_safe_format_log(ANDROID_LOG_WARN,
238 "libc",
239 "pthread_create failed: couldn't allocate %zu-bytes mapped space: %s",
240 mmap_size, strerror(errno));
241 return {};
242 }
243 const size_t writable_size = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
244 if (mprotect(space + stack_guard_size,
245 writable_size,
246 PROT_READ | PROT_WRITE) != 0) {
247 async_safe_format_log(ANDROID_LOG_WARN, "libc",
248 "pthread_create failed: couldn't mprotect R+W %zu-byte thread mapping region: %s",
249 writable_size, strerror(errno));
250 munmap(space, mmap_size);
251 return {};
252 }
253
254 ThreadMapping result = {};
255 result.mmap_base = space;
256 result.mmap_size = mmap_size;
257 result.static_tls = space + mmap_size - PTHREAD_GUARD_SIZE - layout.size();
258 result.stack_base = space;
259 result.stack_top = result.static_tls;
260 return result;
261 }
262
__allocate_thread(pthread_attr_t * attr,bionic_tcb ** tcbp,void ** child_stack)263 static int __allocate_thread(pthread_attr_t* attr, bionic_tcb** tcbp, void** child_stack) {
264 ThreadMapping mapping;
265 char* stack_top;
266 bool stack_clean = false;
267
268 if (attr->stack_base == nullptr) {
269 // The caller didn't provide a stack, so allocate one.
270
271 // Make sure the guard size is a multiple of PAGE_SIZE.
272 const size_t unaligned_guard_size = attr->guard_size;
273 attr->guard_size = __BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
274 if (attr->guard_size < unaligned_guard_size) return EAGAIN;
275
276 mapping = __allocate_thread_mapping(attr->stack_size, attr->guard_size);
277 if (mapping.mmap_base == nullptr) return EAGAIN;
278
279 stack_top = mapping.stack_top;
280 attr->stack_base = mapping.stack_base;
281 stack_clean = true;
282 } else {
283 mapping = __allocate_thread_mapping(0, PTHREAD_GUARD_SIZE);
284 if (mapping.mmap_base == nullptr) return EAGAIN;
285
286 stack_top = static_cast<char*>(attr->stack_base) + attr->stack_size;
287 }
288
289 // Carve out space from the stack for the thread's pthread_internal_t. This
290 // memory isn't counted in pthread_attr_getstacksize.
291
292 // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
293 stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
294
295 pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
296 if (!stack_clean) {
297 // If thread was not allocated by mmap(), it may not have been cleared to zero.
298 // So assume the worst and zero it.
299 memset(thread, 0, sizeof(pthread_internal_t));
300 }
301
302 // Locate static TLS structures within the mapped region.
303 const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
304 auto tcb = reinterpret_cast<bionic_tcb*>(mapping.static_tls + layout.offset_bionic_tcb());
305 auto tls = reinterpret_cast<bionic_tls*>(mapping.static_tls + layout.offset_bionic_tls());
306
307 // Initialize TLS memory.
308 __init_static_tls(mapping.static_tls);
309 __init_tcb(tcb, thread);
310 __init_tcb_dtv(tcb);
311 __init_tcb_stack_guard(tcb);
312 __init_bionic_tls_ptrs(tcb, tls);
313
314 attr->stack_size = stack_top - static_cast<char*>(attr->stack_base);
315 thread->attr = *attr;
316 thread->mmap_base = mapping.mmap_base;
317 thread->mmap_size = mapping.mmap_size;
318
319 *tcbp = tcb;
320 *child_stack = stack_top;
321 return 0;
322 }
323
324 __attribute__((no_sanitize("hwaddress")))
__pthread_start(void * arg)325 static int __pthread_start(void* arg) {
326 pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
327
328 __hwasan_thread_enter();
329
330 // Wait for our creating thread to release us. This lets it have time to
331 // notify gdb about this thread before we start doing anything.
332 // This also provides the memory barrier needed to ensure that all memory
333 // accesses previously made by the creating thread are visible to us.
334 thread->startup_handshake_lock.lock();
335
336 __init_additional_stacks(thread);
337
338 void* result = thread->start_routine(thread->start_routine_arg);
339 pthread_exit(result);
340
341 return 0;
342 }
343
344 // A dummy start routine for pthread_create failures where we've created a thread but aren't
345 // going to run user code on it. We swap out the user's start routine for this and take advantage
346 // of the regular thread teardown to free up resources.
__do_nothing(void *)347 static void* __do_nothing(void*) {
348 return nullptr;
349 }
350
351
352 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
pthread_create(pthread_t * thread_out,pthread_attr_t const * attr,void * (* start_routine)(void *),void * arg)353 int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
354 void* (*start_routine)(void*), void* arg) {
355 ErrnoRestorer errno_restorer;
356
357 pthread_attr_t thread_attr;
358 ScopedTrace trace("pthread_create");
359 if (attr == nullptr) {
360 pthread_attr_init(&thread_attr);
361 } else {
362 thread_attr = *attr;
363 attr = nullptr; // Prevent misuse below.
364 }
365
366 bionic_tcb* tcb = nullptr;
367 void* child_stack = nullptr;
368 int result = __allocate_thread(&thread_attr, &tcb, &child_stack);
369 if (result != 0) {
370 return result;
371 }
372
373 pthread_internal_t* thread = tcb->thread();
374
375 // Create a lock for the thread to wait on once it starts so we can keep
376 // it from doing anything until after we notify the debugger about it
377 //
378 // This also provides the memory barrier we need to ensure that all
379 // memory accesses previously performed by this thread are visible to
380 // the new thread.
381 thread->startup_handshake_lock.init(false);
382 thread->startup_handshake_lock.lock();
383
384 thread->start_routine = start_routine;
385 thread->start_routine_arg = arg;
386
387 thread->set_cached_pid(getpid());
388
389 int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
390 CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
391 void* tls = &tcb->tls_slot(0);
392 #if defined(__i386__)
393 // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
394 // a pointer to the TLS itself.
395 user_desc tls_descriptor;
396 __init_user_desc(&tls_descriptor, false, tls);
397 tls = &tls_descriptor;
398 #endif
399 int rc = clone(__pthread_start, child_stack, flags, thread, &(thread->tid), tls, &(thread->tid));
400 if (rc == -1) {
401 int clone_errno = errno;
402 // We don't have to unlock the mutex at all because clone(2) failed so there's no child waiting to
403 // be unblocked, but we're about to unmap the memory the mutex is stored in, so this serves as a
404 // reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
405 thread->startup_handshake_lock.unlock();
406 if (thread->mmap_size != 0) {
407 munmap(thread->mmap_base, thread->mmap_size);
408 }
409 async_safe_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s",
410 strerror(clone_errno));
411 return clone_errno;
412 }
413
414 int init_errno = __init_thread(thread);
415 if (init_errno != 0) {
416 // Mark the thread detached and replace its start_routine with a no-op.
417 // Letting the thread run is the easiest way to clean up its resources.
418 atomic_store(&thread->join_state, THREAD_DETACHED);
419 __pthread_internal_add(thread);
420 thread->start_routine = __do_nothing;
421 thread->startup_handshake_lock.unlock();
422 return init_errno;
423 }
424
425 // Publish the pthread_t and unlock the mutex to let the new thread start running.
426 *thread_out = __pthread_internal_add(thread);
427 thread->startup_handshake_lock.unlock();
428
429 return 0;
430 }
431