1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <pthread.h>
30
31 #include <errno.h>
32 #include <limits.h>
33 #include <stdatomic.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <sys/cdefs.h>
37 #include <sys/mman.h>
38 #include <unistd.h>
39
40 #include "pthread_internal.h"
41
42 #include "private/bionic_constants.h"
43 #include "private/bionic_fortify.h"
44 #include "private/bionic_futex.h"
45 #include "private/bionic_sdk_version.h"
46 #include "private/bionic_systrace.h"
47 #include "private/bionic_time_conversions.h"
48 #include "private/bionic_tls.h"
49
50 /* a mutex attribute holds the following fields
51 *
52 * bits: name description
53 * 0-3 type type of mutex
54 * 4 shared process-shared flag
55 * 5 protocol whether it is a priority inherit mutex.
56 */
57 #define MUTEXATTR_TYPE_MASK 0x000f
58 #define MUTEXATTR_SHARED_MASK 0x0010
59 #define MUTEXATTR_PROTOCOL_MASK 0x0020
60
61 #define MUTEXATTR_PROTOCOL_SHIFT 5
62
pthread_mutexattr_init(pthread_mutexattr_t * attr)63 int pthread_mutexattr_init(pthread_mutexattr_t *attr)
64 {
65 *attr = PTHREAD_MUTEX_DEFAULT;
66 return 0;
67 }
68
pthread_mutexattr_destroy(pthread_mutexattr_t * attr)69 int pthread_mutexattr_destroy(pthread_mutexattr_t *attr)
70 {
71 *attr = -1;
72 return 0;
73 }
74
pthread_mutexattr_gettype(const pthread_mutexattr_t * attr,int * type_p)75 int pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type_p)
76 {
77 int type = (*attr & MUTEXATTR_TYPE_MASK);
78
79 if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK) {
80 return EINVAL;
81 }
82
83 *type_p = type;
84 return 0;
85 }
86
pthread_mutexattr_settype(pthread_mutexattr_t * attr,int type)87 int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type)
88 {
89 if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK ) {
90 return EINVAL;
91 }
92
93 *attr = (*attr & ~MUTEXATTR_TYPE_MASK) | type;
94 return 0;
95 }
96
97 /* process-shared mutexes are not supported at the moment */
98
pthread_mutexattr_setpshared(pthread_mutexattr_t * attr,int pshared)99 int pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
100 {
101 switch (pshared) {
102 case PTHREAD_PROCESS_PRIVATE:
103 *attr &= ~MUTEXATTR_SHARED_MASK;
104 return 0;
105
106 case PTHREAD_PROCESS_SHARED:
107 /* our current implementation of pthread actually supports shared
108 * mutexes but won't cleanup if a process dies with the mutex held.
109 * Nevertheless, it's better than nothing. Shared mutexes are used
110 * by surfaceflinger and audioflinger.
111 */
112 *attr |= MUTEXATTR_SHARED_MASK;
113 return 0;
114 }
115 return EINVAL;
116 }
117
pthread_mutexattr_getpshared(const pthread_mutexattr_t * attr,int * pshared)118 int pthread_mutexattr_getpshared(const pthread_mutexattr_t* attr, int* pshared) {
119 *pshared = (*attr & MUTEXATTR_SHARED_MASK) ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE;
120 return 0;
121 }
122
pthread_mutexattr_setprotocol(pthread_mutexattr_t * attr,int protocol)123 int pthread_mutexattr_setprotocol(pthread_mutexattr_t* attr, int protocol) {
124 if (protocol != PTHREAD_PRIO_NONE && protocol != PTHREAD_PRIO_INHERIT) {
125 return EINVAL;
126 }
127 *attr = (*attr & ~MUTEXATTR_PROTOCOL_MASK) | (protocol << MUTEXATTR_PROTOCOL_SHIFT);
128 return 0;
129 }
130
pthread_mutexattr_getprotocol(const pthread_mutexattr_t * attr,int * protocol)131 int pthread_mutexattr_getprotocol(const pthread_mutexattr_t* attr, int* protocol) {
132 *protocol = (*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT;
133 return 0;
134 }
135
136 // Priority Inheritance mutex implementation
137 struct PIMutex {
138 // mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck), constant during lifetime
139 uint8_t type;
140 // process-shared flag, constant during lifetime
141 bool shared;
142 // <number of times a thread holding a recursive PI mutex> - 1
143 uint16_t counter;
144 // owner_tid is read/written by both userspace code and kernel code. It includes three fields:
145 // FUTEX_WAITERS, FUTEX_OWNER_DIED and FUTEX_TID_MASK.
146 atomic_int owner_tid;
147 };
148
PIMutexTryLock(PIMutex & mutex)149 static inline __always_inline int PIMutexTryLock(PIMutex& mutex) {
150 pid_t tid = __get_thread()->tid;
151 // Handle common case first.
152 int old_owner = 0;
153 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
154 &old_owner, tid,
155 memory_order_acquire,
156 memory_order_relaxed))) {
157 return 0;
158 }
159 if (tid == (old_owner & FUTEX_TID_MASK)) {
160 // We already own this mutex.
161 if (mutex.type == PTHREAD_MUTEX_NORMAL) {
162 return EBUSY;
163 }
164 if (mutex.type == PTHREAD_MUTEX_ERRORCHECK) {
165 return EDEADLK;
166 }
167 if (mutex.counter == 0xffff) {
168 return EAGAIN;
169 }
170 mutex.counter++;
171 return 0;
172 }
173 return EBUSY;
174 }
175
176 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
177 // ARM/ARM64, which increases at most 20 percent overhead. So make it noinline.
PIMutexTimedLock(PIMutex & mutex,bool use_realtime_clock,const timespec * abs_timeout)178 static int __attribute__((noinline)) PIMutexTimedLock(PIMutex& mutex,
179 bool use_realtime_clock,
180 const timespec* abs_timeout) {
181 int ret = PIMutexTryLock(mutex);
182 if (__predict_true(ret == 0)) {
183 return 0;
184 }
185 if (ret == EBUSY) {
186 ScopedTrace trace("Contending for pthread mutex");
187 ret = -__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, use_realtime_clock, abs_timeout);
188 }
189 return ret;
190 }
191
PIMutexUnlock(PIMutex & mutex)192 static int PIMutexUnlock(PIMutex& mutex) {
193 pid_t tid = __get_thread()->tid;
194 int old_owner = tid;
195 // Handle common case first.
196 if (__predict_true(mutex.type == PTHREAD_MUTEX_NORMAL)) {
197 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
198 &old_owner, 0,
199 memory_order_release,
200 memory_order_relaxed))) {
201 return 0;
202 }
203 }
204
205 if (tid != (old_owner & FUTEX_TID_MASK)) {
206 // The mutex can only be unlocked by the thread who owns it.
207 return EPERM;
208 }
209 if (mutex.type == PTHREAD_MUTEX_RECURSIVE) {
210 if (mutex.counter != 0u) {
211 --mutex.counter;
212 return 0;
213 }
214 }
215 if (old_owner == tid) {
216 // No thread is waiting.
217 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
218 &old_owner, 0,
219 memory_order_release,
220 memory_order_relaxed))) {
221 return 0;
222 }
223 }
224 return -__futex_pi_unlock(&mutex.owner_tid, mutex.shared);
225 }
226
PIMutexDestroy(PIMutex & mutex)227 static int PIMutexDestroy(PIMutex& mutex) {
228 // The mutex should be in unlocked state (owner_tid == 0) when destroyed.
229 // Store 0xffffffff to make the mutex unusable.
230 int old_owner = 0;
231 if (atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, 0xffffffff,
232 memory_order_relaxed, memory_order_relaxed)) {
233 return 0;
234 }
235 return EBUSY;
236 }
237
238 #if !defined(__LP64__)
239
240 namespace PIMutexAllocator {
241 // pthread_mutex_t has only 4 bytes in 32-bit programs, which are not enough to hold PIMutex.
242 // So we use malloc to allocate PIMutexes and use 16-bit of pthread_mutex_t as indexes to find
243 // the allocated PIMutexes. This allows at most 65536 PI mutexes.
244 // When calling operations like pthread_mutex_lock/unlock, the 16-bit index is mapped to the
245 // corresponding PIMutex. To make the map operation fast, we use a lockless mapping method:
246 // Once a PIMutex is allocated, all the data used to map index to the PIMutex isn't changed until
247 // it is destroyed.
248 // Below are the data structures:
249 // // struct Node contains a PIMutex.
250 // typedef Node NodeArray[256];
251 // typedef NodeArray* NodeArrayP;
252 // NodeArrayP nodes[256];
253 //
254 // A 16-bit index is mapped to Node as below:
255 // (*nodes[index >> 8])[index & 0xff]
256 //
257 // Also use a free list to allow O(1) finding recycled PIMutexes.
258
259 union Node {
260 PIMutex mutex;
261 int next_free_id; // If not -1, refer to the next node in the free PIMutex list.
262 };
263 typedef Node NodeArray[256];
264 typedef NodeArray* NodeArrayP;
265
266 // lock_ protects below items.
267 static Lock lock;
268 static NodeArrayP* nodes;
269 static int next_to_alloc_id;
270 static int first_free_id = -1; // If not -1, refer to the first node in the free PIMutex list.
271
IdToNode(int id)272 static inline __always_inline Node& IdToNode(int id) {
273 return (*nodes[id >> 8])[id & 0xff];
274 }
275
IdToPIMutex(int id)276 static inline __always_inline PIMutex& IdToPIMutex(int id) {
277 return IdToNode(id).mutex;
278 }
279
AllocIdLocked()280 static int AllocIdLocked() {
281 if (first_free_id != -1) {
282 int result = first_free_id;
283 first_free_id = IdToNode(result).next_free_id;
284 return result;
285 }
286 if (next_to_alloc_id >= 0x10000) {
287 return -1;
288 }
289 int array_pos = next_to_alloc_id >> 8;
290 int node_pos = next_to_alloc_id & 0xff;
291 if (node_pos == 0) {
292 if (array_pos == 0) {
293 nodes = static_cast<NodeArray**>(calloc(256, sizeof(NodeArray*)));
294 if (nodes == nullptr) {
295 return -1;
296 }
297 }
298 nodes[array_pos] = static_cast<NodeArray*>(malloc(sizeof(NodeArray)));
299 if (nodes[array_pos] == nullptr) {
300 return -1;
301 }
302 }
303 return next_to_alloc_id++;
304 }
305
306 // If succeed, return an id referring to a PIMutex, otherwise return -1.
307 // A valid id is in range [0, 0xffff].
AllocId()308 static int AllocId() {
309 lock.lock();
310 int result = AllocIdLocked();
311 lock.unlock();
312 if (result != -1) {
313 memset(&IdToPIMutex(result), 0, sizeof(PIMutex));
314 }
315 return result;
316 }
317
FreeId(int id)318 static void FreeId(int id) {
319 lock.lock();
320 IdToNode(id).next_free_id = first_free_id;
321 first_free_id = id;
322 lock.unlock();
323 }
324
325 } // namespace PIMutexAllocator
326
327 #endif // !defined(__LP64__)
328
329
330 /* Convenience macro, creates a mask of 'bits' bits that starts from
331 * the 'shift'-th least significant bit in a 32-bit word.
332 *
333 * Examples: FIELD_MASK(0,4) -> 0xf
334 * FIELD_MASK(16,9) -> 0x1ff0000
335 */
336 #define FIELD_MASK(shift,bits) (((1 << (bits))-1) << (shift))
337
338 /* This one is used to create a bit pattern from a given field value */
339 #define FIELD_TO_BITS(val,shift,bits) (((val) & ((1 << (bits))-1)) << (shift))
340
341 /* And this one does the opposite, i.e. extract a field's value from a bit pattern */
342 #define FIELD_FROM_BITS(val,shift,bits) (((val) >> (shift)) & ((1 << (bits))-1))
343
344 /* Convenience macros.
345 *
346 * These are used to form or modify the bit pattern of a given mutex value
347 */
348
349 /* Mutex state:
350 *
351 * 0 for unlocked
352 * 1 for locked, no waiters
353 * 2 for locked, maybe waiters
354 */
355 #define MUTEX_STATE_SHIFT 0
356 #define MUTEX_STATE_LEN 2
357
358 #define MUTEX_STATE_MASK FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
359 #define MUTEX_STATE_FROM_BITS(v) FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
360 #define MUTEX_STATE_TO_BITS(v) FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
361
362 #define MUTEX_STATE_UNLOCKED 0 /* must be 0 to match PTHREAD_MUTEX_INITIALIZER */
363 #define MUTEX_STATE_LOCKED_UNCONTENDED 1 /* must be 1 due to atomic dec in unlock operation */
364 #define MUTEX_STATE_LOCKED_CONTENDED 2 /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */
365
366 #define MUTEX_STATE_BITS_UNLOCKED MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
367 #define MUTEX_STATE_BITS_LOCKED_UNCONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
368 #define MUTEX_STATE_BITS_LOCKED_CONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)
369
370 // Return true iff the mutex is unlocked.
371 #define MUTEX_STATE_BITS_IS_UNLOCKED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_UNLOCKED)
372
373 // Return true iff the mutex is locked with no waiters.
374 #define MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED)
375
376 // return true iff the mutex is locked with maybe waiters.
377 #define MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED)
378
379 /* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */
380 #define MUTEX_STATE_BITS_FLIP_CONTENTION(v) ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED))
381
382 /* Mutex counter:
383 *
384 * We need to check for overflow before incrementing, and we also need to
385 * detect when the counter is 0
386 */
387 #define MUTEX_COUNTER_SHIFT 2
388 #define MUTEX_COUNTER_LEN 11
389 #define MUTEX_COUNTER_MASK FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN)
390
391 #define MUTEX_COUNTER_BITS_WILL_OVERFLOW(v) (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK)
392 #define MUTEX_COUNTER_BITS_IS_ZERO(v) (((v) & MUTEX_COUNTER_MASK) == 0)
393
394 /* Used to increment the counter directly after overflow has been checked */
395 #define MUTEX_COUNTER_BITS_ONE FIELD_TO_BITS(1, MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN)
396
397 /* Mutex shared bit flag
398 *
399 * This flag is set to indicate that the mutex is shared among processes.
400 * This changes the futex opcode we use for futex wait/wake operations
401 * (non-shared operations are much faster).
402 */
403 #define MUTEX_SHARED_SHIFT 13
404 #define MUTEX_SHARED_MASK FIELD_MASK(MUTEX_SHARED_SHIFT,1)
405
406 /* Mutex type:
407 * We support normal, recursive and errorcheck mutexes.
408 */
409 #define MUTEX_TYPE_SHIFT 14
410 #define MUTEX_TYPE_LEN 2
411 #define MUTEX_TYPE_MASK FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)
412
413 #define MUTEX_TYPE_TO_BITS(t) FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)
414
415 #define MUTEX_TYPE_BITS_NORMAL MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_NORMAL)
416 #define MUTEX_TYPE_BITS_RECURSIVE MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_RECURSIVE)
417 #define MUTEX_TYPE_BITS_ERRORCHECK MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_ERRORCHECK)
418 // Use a special mutex type to mark priority inheritance mutexes.
419 #define PI_MUTEX_STATE MUTEX_TYPE_TO_BITS(3)
420
421 // For a PI mutex, it includes below fields:
422 // Atomic(uint16_t) state;
423 // PIMutex pi_mutex; // uint16_t pi_mutex_id in 32-bit programs
424 //
425 // state holds the following fields:
426 //
427 // bits: name description
428 // 15-14 type mutex type, should be 3
429 // 13-0 padding should be 0
430 //
431 // pi_mutex holds the state of a PI mutex.
432 // pi_mutex_id holds an integer to find the state of a PI mutex.
433 //
434 // For a Non-PI mutex, it includes below fields:
435 // Atomic(uint16_t) state;
436 // atomic_int owner_tid; // Atomic(uint16_t) in 32-bit programs
437 //
438 // state holds the following fields:
439 //
440 // bits: name description
441 // 15-14 type mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck)
442 // 13 shared process-shared flag
443 // 12-2 counter <number of times a thread holding a recursive Non-PI mutex> - 1
444 // 1-0 state lock state (0, 1 or 2)
445 //
446 // bits 15-13 are constant during the lifetime of the mutex.
447 //
448 // owner_tid is used only in recursive and errorcheck Non-PI mutexes to hold the mutex owner
449 // thread id.
450 //
451 // PI mutexes and Non-PI mutexes are distinguished by checking type field in state.
452 #if defined(__LP64__)
453 struct pthread_mutex_internal_t {
454 _Atomic(uint16_t) state;
455 uint16_t __pad;
456 union {
457 atomic_int owner_tid;
458 PIMutex pi_mutex;
459 };
460 char __reserved[28];
461
ToPIMutexpthread_mutex_internal_t462 PIMutex& ToPIMutex() {
463 return pi_mutex;
464 }
465
FreePIMutexpthread_mutex_internal_t466 void FreePIMutex() {
467 }
468 } __attribute__((aligned(4)));
469
470 #else
471 struct pthread_mutex_internal_t {
472 _Atomic(uint16_t) state;
473 union {
474 _Atomic(uint16_t) owner_tid;
475 uint16_t pi_mutex_id;
476 };
477
ToPIMutexpthread_mutex_internal_t478 PIMutex& ToPIMutex() {
479 return PIMutexAllocator::IdToPIMutex(pi_mutex_id);
480 }
481
FreePIMutexpthread_mutex_internal_t482 void FreePIMutex() {
483 PIMutexAllocator::FreeId(pi_mutex_id);
484 }
485 } __attribute__((aligned(4)));
486 #endif
487
488 static_assert(sizeof(pthread_mutex_t) == sizeof(pthread_mutex_internal_t),
489 "pthread_mutex_t should actually be pthread_mutex_internal_t in implementation.");
490
491 // For binary compatibility with old version of pthread_mutex_t, we can't use more strict alignment
492 // than 4-byte alignment.
493 static_assert(alignof(pthread_mutex_t) == 4,
494 "pthread_mutex_t should fulfill the alignment of pthread_mutex_internal_t.");
495
__get_internal_mutex(pthread_mutex_t * mutex_interface)496 static inline pthread_mutex_internal_t* __get_internal_mutex(pthread_mutex_t* mutex_interface) {
497 return reinterpret_cast<pthread_mutex_internal_t*>(mutex_interface);
498 }
499
pthread_mutex_init(pthread_mutex_t * mutex_interface,const pthread_mutexattr_t * attr)500 int pthread_mutex_init(pthread_mutex_t* mutex_interface, const pthread_mutexattr_t* attr) {
501 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
502
503 memset(mutex, 0, sizeof(pthread_mutex_internal_t));
504
505 if (__predict_true(attr == NULL)) {
506 atomic_init(&mutex->state, MUTEX_TYPE_BITS_NORMAL);
507 return 0;
508 }
509
510 uint16_t state = 0;
511 if ((*attr & MUTEXATTR_SHARED_MASK) != 0) {
512 state |= MUTEX_SHARED_MASK;
513 }
514
515 switch (*attr & MUTEXATTR_TYPE_MASK) {
516 case PTHREAD_MUTEX_NORMAL:
517 state |= MUTEX_TYPE_BITS_NORMAL;
518 break;
519 case PTHREAD_MUTEX_RECURSIVE:
520 state |= MUTEX_TYPE_BITS_RECURSIVE;
521 break;
522 case PTHREAD_MUTEX_ERRORCHECK:
523 state |= MUTEX_TYPE_BITS_ERRORCHECK;
524 break;
525 default:
526 return EINVAL;
527 }
528
529 if (((*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT) == PTHREAD_PRIO_INHERIT) {
530 #if !defined(__LP64__)
531 if (state & MUTEX_SHARED_MASK) {
532 return EINVAL;
533 }
534 int id = PIMutexAllocator::AllocId();
535 if (id == -1) {
536 return ENOMEM;
537 }
538 mutex->pi_mutex_id = id;
539 #endif
540 atomic_init(&mutex->state, PI_MUTEX_STATE);
541 PIMutex& pi_mutex = mutex->ToPIMutex();
542 pi_mutex.type = *attr & MUTEXATTR_TYPE_MASK;
543 pi_mutex.shared = (*attr & MUTEXATTR_SHARED_MASK) != 0;
544 } else {
545 atomic_init(&mutex->state, state);
546 atomic_init(&mutex->owner_tid, 0);
547 }
548 return 0;
549 }
550
551 // namespace for Non-PI mutex routines.
552 namespace NonPI {
553
NormalMutexTryLock(pthread_mutex_internal_t * mutex,uint16_t shared)554 static inline __always_inline int NormalMutexTryLock(pthread_mutex_internal_t* mutex,
555 uint16_t shared) {
556 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED;
557 const uint16_t locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
558
559 uint16_t old_state = unlocked;
560 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
561 locked_uncontended, memory_order_acquire, memory_order_relaxed))) {
562 return 0;
563 }
564 return EBUSY;
565 }
566
567 /*
568 * Lock a normal Non-PI mutex.
569 *
570 * As noted above, there are three states:
571 * 0 (unlocked, no contention)
572 * 1 (locked, no contention)
573 * 2 (locked, contention)
574 *
575 * Non-recursive mutexes don't use the thread-id or counter fields, and the
576 * "type" value is zero, so the only bits that will be set are the ones in
577 * the lock state field.
578 */
NormalMutexLock(pthread_mutex_internal_t * mutex,uint16_t shared,bool use_realtime_clock,const timespec * abs_timeout_or_null)579 static inline __always_inline int NormalMutexLock(pthread_mutex_internal_t* mutex,
580 uint16_t shared,
581 bool use_realtime_clock,
582 const timespec* abs_timeout_or_null) {
583 if (__predict_true(NormalMutexTryLock(mutex, shared) == 0)) {
584 return 0;
585 }
586 int result = check_timespec(abs_timeout_or_null, true);
587 if (result != 0) {
588 return result;
589 }
590
591 ScopedTrace trace("Contending for pthread mutex");
592
593 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED;
594 const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
595
596 // We want to go to sleep until the mutex is available, which requires
597 // promoting it to locked_contended. We need to swap in the new state
598 // and then wait until somebody wakes us up.
599 // An atomic_exchange is used to compete with other threads for the lock.
600 // If it returns unlocked, we have acquired the lock, otherwise another
601 // thread still holds the lock and we should wait again.
602 // If lock is acquired, an acquire fence is needed to make all memory accesses
603 // made by other threads visible to the current CPU.
604 while (atomic_exchange_explicit(&mutex->state, locked_contended,
605 memory_order_acquire) != unlocked) {
606 if (__futex_wait_ex(&mutex->state, shared, locked_contended, use_realtime_clock,
607 abs_timeout_or_null) == -ETIMEDOUT) {
608 return ETIMEDOUT;
609 }
610 }
611 return 0;
612 }
613
614 /*
615 * Release a normal Non-PI mutex. The caller is responsible for determining
616 * that we are in fact the owner of this lock.
617 */
NormalMutexUnlock(pthread_mutex_internal_t * mutex,uint16_t shared)618 static inline __always_inline void NormalMutexUnlock(pthread_mutex_internal_t* mutex,
619 uint16_t shared) {
620 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED;
621 const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
622
623 // We use an atomic_exchange to release the lock. If locked_contended state
624 // is returned, some threads is waiting for the lock and we need to wake up
625 // one of them.
626 // A release fence is required to make previous stores visible to next
627 // lock owner threads.
628 if (atomic_exchange_explicit(&mutex->state, unlocked,
629 memory_order_release) == locked_contended) {
630 // Wake up one waiting thread. We don't know which thread will be
631 // woken or when it'll start executing -- futexes make no guarantees
632 // here. There may not even be a thread waiting.
633 //
634 // The newly-woken thread will replace the unlocked state we just set above
635 // with locked_contended state, which means that when it eventually releases
636 // the mutex it will also call FUTEX_WAKE. This results in one extra wake
637 // call whenever a lock is contended, but let us avoid forgetting anyone
638 // without requiring us to track the number of sleepers.
639 //
640 // It's possible for another thread to sneak in and grab the lock between
641 // the exchange above and the wake call below. If the new thread is "slow"
642 // and holds the lock for a while, we'll wake up a sleeper, which will swap
643 // in locked_uncontended state and then go back to sleep since the lock is
644 // still held. If the new thread is "fast", running to completion before
645 // we call wake, the thread we eventually wake will find an unlocked mutex
646 // and will execute. Either way we have correct behavior and nobody is
647 // orphaned on the wait queue.
648 __futex_wake_ex(&mutex->state, shared, 1);
649 }
650 }
651
652 /* This common inlined function is used to increment the counter of a recursive Non-PI mutex.
653 *
654 * If the counter overflows, it will return EAGAIN.
655 * Otherwise, it atomically increments the counter and returns 0.
656 *
657 */
RecursiveIncrement(pthread_mutex_internal_t * mutex,uint16_t old_state)658 static inline __always_inline int RecursiveIncrement(pthread_mutex_internal_t* mutex,
659 uint16_t old_state) {
660 // Detect recursive lock overflow and return EAGAIN.
661 // This is safe because only the owner thread can modify the
662 // counter bits in the mutex value.
663 if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(old_state)) {
664 return EAGAIN;
665 }
666
667 // Other threads are able to change the lower bits (e.g. promoting it to "contended"),
668 // but the mutex counter will not overflow. So we use atomic_fetch_add operation here.
669 // The mutex is already locked by current thread, so we don't need an acquire fence.
670 atomic_fetch_add_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
671 return 0;
672 }
673
674 // Wait on a recursive or errorcheck Non-PI mutex.
RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t * mutex,uint16_t shared,uint16_t old_state,bool use_realtime_clock,const timespec * abs_timeout)675 static inline __always_inline int RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t* mutex,
676 uint16_t shared,
677 uint16_t old_state,
678 bool use_realtime_clock,
679 const timespec* abs_timeout) {
680 // __futex_wait always waits on a 32-bit value. But state is 16-bit. For a normal mutex, the owner_tid
681 // field in mutex is not used. On 64-bit devices, the __pad field in mutex is not used.
682 // But when a recursive or errorcheck mutex is used on 32-bit devices, we need to add the
683 // owner_tid value in the value argument for __futex_wait, otherwise we may always get EAGAIN error.
684
685 #if defined(__LP64__)
686 return __futex_wait_ex(&mutex->state, shared, old_state, use_realtime_clock, abs_timeout);
687
688 #else
689 // This implementation works only when the layout of pthread_mutex_internal_t matches below expectation.
690 // And it is based on the assumption that Android is always in little-endian devices.
691 static_assert(offsetof(pthread_mutex_internal_t, state) == 0, "");
692 static_assert(offsetof(pthread_mutex_internal_t, owner_tid) == 2, "");
693
694 uint32_t owner_tid = atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed);
695 return __futex_wait_ex(&mutex->state, shared, (owner_tid << 16) | old_state,
696 use_realtime_clock, abs_timeout);
697 #endif
698 }
699
700 // Lock a Non-PI mutex.
MutexLockWithTimeout(pthread_mutex_internal_t * mutex,bool use_realtime_clock,const timespec * abs_timeout_or_null)701 static int MutexLockWithTimeout(pthread_mutex_internal_t* mutex, bool use_realtime_clock,
702 const timespec* abs_timeout_or_null) {
703 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
704 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
705 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
706
707 // Handle common case first.
708 if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
709 return NormalMutexLock(mutex, shared, use_realtime_clock, abs_timeout_or_null);
710 }
711
712 // Do we already own this recursive or error-check mutex?
713 pid_t tid = __get_thread()->tid;
714 if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) {
715 if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
716 return EDEADLK;
717 }
718 return RecursiveIncrement(mutex, old_state);
719 }
720
721 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
722 const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
723 const uint16_t locked_contended = mtype | shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
724
725 // First, if the mutex is unlocked, try to quickly acquire it.
726 // In the optimistic case where this works, set the state to locked_uncontended.
727 if (old_state == unlocked) {
728 // If exchanged successfully, an acquire fence is required to make
729 // all memory accesses made by other threads visible to the current CPU.
730 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
731 locked_uncontended, memory_order_acquire, memory_order_relaxed))) {
732 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
733 return 0;
734 }
735 }
736
737 ScopedTrace trace("Contending for pthread mutex");
738
739 while (true) {
740 if (old_state == unlocked) {
741 // NOTE: We put the state to locked_contended since we _know_ there
742 // is contention when we are in this loop. This ensures all waiters
743 // will be unlocked.
744
745 // If exchanged successfully, an acquire fence is required to make
746 // all memory accesses made by other threads visible to the current CPU.
747 if (__predict_true(atomic_compare_exchange_weak_explicit(&mutex->state,
748 &old_state, locked_contended,
749 memory_order_acquire,
750 memory_order_relaxed))) {
751 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
752 return 0;
753 }
754 continue;
755 } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(old_state)) {
756 // We should set it to locked_contended beforing going to sleep. This can make
757 // sure waiters will be woken up eventually.
758
759 int new_state = MUTEX_STATE_BITS_FLIP_CONTENTION(old_state);
760 if (__predict_false(!atomic_compare_exchange_weak_explicit(&mutex->state,
761 &old_state, new_state,
762 memory_order_relaxed,
763 memory_order_relaxed))) {
764 continue;
765 }
766 old_state = new_state;
767 }
768
769 int result = check_timespec(abs_timeout_or_null, true);
770 if (result != 0) {
771 return result;
772 }
773 // We are in locked_contended state, sleep until someone wakes us up.
774 if (RecursiveOrErrorcheckMutexWait(mutex, shared, old_state, use_realtime_clock,
775 abs_timeout_or_null) == -ETIMEDOUT) {
776 return ETIMEDOUT;
777 }
778 old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
779 }
780 }
781
782 } // namespace NonPI
783
IsMutexDestroyed(uint16_t mutex_state)784 static inline __always_inline bool IsMutexDestroyed(uint16_t mutex_state) {
785 return mutex_state == 0xffff;
786 }
787
788 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
789 // ARM64. So make it noinline.
HandleUsingDestroyedMutex(pthread_mutex_t * mutex,const char * function_name)790 static int __attribute__((noinline)) HandleUsingDestroyedMutex(pthread_mutex_t* mutex,
791 const char* function_name) {
792 if (bionic_get_application_target_sdk_version() >= __ANDROID_API_P__) {
793 __fortify_fatal("%s called on a destroyed mutex (%p)", function_name, mutex);
794 }
795 return EBUSY;
796 }
797
pthread_mutex_lock(pthread_mutex_t * mutex_interface)798 int pthread_mutex_lock(pthread_mutex_t* mutex_interface) {
799 #if !defined(__LP64__)
800 // Some apps depend on being able to pass NULL as a mutex and get EINVAL
801 // back. Don't need to worry about it for LP64 since the ABI is brand new,
802 // but keep compatibility for LP32. http://b/19995172.
803 if (mutex_interface == NULL) {
804 return EINVAL;
805 }
806 #endif
807
808 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
809 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
810 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
811 // Avoid slowing down fast path of normal mutex lock operation.
812 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
813 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
814 if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) {
815 return 0;
816 }
817 }
818 if (old_state == PI_MUTEX_STATE) {
819 PIMutex& m = mutex->ToPIMutex();
820 // Handle common case first.
821 if (__predict_true(PIMutexTryLock(m) == 0)) {
822 return 0;
823 }
824 return PIMutexTimedLock(mutex->ToPIMutex(), false, nullptr);
825 }
826 if (__predict_false(IsMutexDestroyed(old_state))) {
827 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
828 }
829 return NonPI::MutexLockWithTimeout(mutex, false, nullptr);
830 }
831
pthread_mutex_unlock(pthread_mutex_t * mutex_interface)832 int pthread_mutex_unlock(pthread_mutex_t* mutex_interface) {
833 #if !defined(__LP64__)
834 // Some apps depend on being able to pass NULL as a mutex and get EINVAL
835 // back. Don't need to worry about it for LP64 since the ABI is brand new,
836 // but keep compatibility for LP32. http://b/19995172.
837 if (mutex_interface == NULL) {
838 return EINVAL;
839 }
840 #endif
841
842 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
843 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
844 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
845 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
846
847 // Handle common case first.
848 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
849 NonPI::NormalMutexUnlock(mutex, shared);
850 return 0;
851 }
852 if (old_state == PI_MUTEX_STATE) {
853 return PIMutexUnlock(mutex->ToPIMutex());
854 }
855 if (__predict_false(IsMutexDestroyed(old_state))) {
856 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
857 }
858
859 // Do we already own this recursive or error-check mutex?
860 pid_t tid = __get_thread()->tid;
861 if ( tid != atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed) ) {
862 return EPERM;
863 }
864
865 // If the counter is > 0, we can simply decrement it atomically.
866 // Since other threads can mutate the lower state bits (and only the
867 // lower state bits), use a compare_exchange loop to do it.
868 if (!MUTEX_COUNTER_BITS_IS_ZERO(old_state)) {
869 // We still own the mutex, so a release fence is not needed.
870 atomic_fetch_sub_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
871 return 0;
872 }
873
874 // The counter is 0, so we'are going to unlock the mutex by resetting its
875 // state to unlocked, we need to perform a atomic_exchange inorder to read
876 // the current state, which will be locked_contended if there may have waiters
877 // to awake.
878 // A release fence is required to make previous stores visible to next
879 // lock owner threads.
880 atomic_store_explicit(&mutex->owner_tid, 0, memory_order_relaxed);
881 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
882 old_state = atomic_exchange_explicit(&mutex->state, unlocked, memory_order_release);
883 if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(old_state)) {
884 __futex_wake_ex(&mutex->state, shared, 1);
885 }
886
887 return 0;
888 }
889
pthread_mutex_trylock(pthread_mutex_t * mutex_interface)890 int pthread_mutex_trylock(pthread_mutex_t* mutex_interface) {
891 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
892
893 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
894 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
895
896 // Handle common case first.
897 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
898 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
899 return NonPI::NormalMutexTryLock(mutex, shared);
900 }
901 if (old_state == PI_MUTEX_STATE) {
902 return PIMutexTryLock(mutex->ToPIMutex());
903 }
904 if (__predict_false(IsMutexDestroyed(old_state))) {
905 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
906 }
907
908 // Do we already own this recursive or error-check mutex?
909 pid_t tid = __get_thread()->tid;
910 if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) {
911 if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
912 return EBUSY;
913 }
914 return NonPI::RecursiveIncrement(mutex, old_state);
915 }
916
917 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
918 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
919 const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
920
921 // Same as pthread_mutex_lock, except that we don't want to wait, and
922 // the only operation that can succeed is a single compare_exchange to acquire the
923 // lock if it is released / not owned by anyone. No need for a complex loop.
924 // If exchanged successfully, an acquire fence is required to make
925 // all memory accesses made by other threads visible to the current CPU.
926 old_state = unlocked;
927 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
928 locked_uncontended,
929 memory_order_acquire,
930 memory_order_relaxed))) {
931 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
932 return 0;
933 }
934 return EBUSY;
935 }
936
937 #if !defined(__LP64__)
pthread_mutex_lock_timeout_np(pthread_mutex_t * mutex_interface,unsigned ms)938 extern "C" int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex_interface, unsigned ms) {
939 timespec ts;
940 timespec_from_ms(ts, ms);
941 timespec abs_timeout;
942 absolute_timespec_from_timespec(abs_timeout, ts, CLOCK_MONOTONIC);
943 int error = NonPI::MutexLockWithTimeout(__get_internal_mutex(mutex_interface), false,
944 &abs_timeout);
945 if (error == ETIMEDOUT) {
946 error = EBUSY;
947 }
948 return error;
949 }
950 #endif
951
__pthread_mutex_timedlock(pthread_mutex_t * mutex_interface,bool use_realtime_clock,const timespec * abs_timeout,const char * function)952 static int __pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, bool use_realtime_clock,
953 const timespec* abs_timeout, const char* function) {
954 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
955 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
956 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
957 // Handle common case first.
958 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
959 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
960 if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) {
961 return 0;
962 }
963 }
964 if (old_state == PI_MUTEX_STATE) {
965 return PIMutexTimedLock(mutex->ToPIMutex(), use_realtime_clock, abs_timeout);
966 }
967 if (__predict_false(IsMutexDestroyed(old_state))) {
968 return HandleUsingDestroyedMutex(mutex_interface, function);
969 }
970 return NonPI::MutexLockWithTimeout(mutex, use_realtime_clock, abs_timeout);
971 }
972
pthread_mutex_timedlock(pthread_mutex_t * mutex_interface,const struct timespec * abs_timeout)973 int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const struct timespec* abs_timeout) {
974 return __pthread_mutex_timedlock(mutex_interface, true, abs_timeout, __FUNCTION__);
975 }
976
pthread_mutex_timedlock_monotonic_np(pthread_mutex_t * mutex_interface,const struct timespec * abs_timeout)977 int pthread_mutex_timedlock_monotonic_np(pthread_mutex_t* mutex_interface,
978 const struct timespec* abs_timeout) {
979 return __pthread_mutex_timedlock(mutex_interface, false, abs_timeout, __FUNCTION__);
980 }
981
pthread_mutex_destroy(pthread_mutex_t * mutex_interface)982 int pthread_mutex_destroy(pthread_mutex_t* mutex_interface) {
983 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
984 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
985 if (__predict_false(IsMutexDestroyed(old_state))) {
986 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
987 }
988 if (old_state == PI_MUTEX_STATE) {
989 int result = PIMutexDestroy(mutex->ToPIMutex());
990 if (result == 0) {
991 mutex->FreePIMutex();
992 atomic_store(&mutex->state, 0xffff);
993 }
994 return result;
995 }
996 // Store 0xffff to make the mutex unusable. Although POSIX standard says it is undefined
997 // behavior to destroy a locked mutex, we prefer not to change mutex->state in that situation.
998 if (MUTEX_STATE_BITS_IS_UNLOCKED(old_state) &&
999 atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, 0xffff,
1000 memory_order_relaxed, memory_order_relaxed)) {
1001 return 0;
1002 }
1003 return EBUSY;
1004 }
1005