1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <pthread.h>
30
31 #include <errno.h>
32 #include <limits.h>
33 #include <stdatomic.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <sys/cdefs.h>
37 #include <sys/mman.h>
38 #include <unistd.h>
39
40 #include "pthread_internal.h"
41
42 #include "private/bionic_constants.h"
43 #include "private/bionic_fortify.h"
44 #include "private/bionic_futex.h"
45 #include "private/bionic_systrace.h"
46 #include "private/bionic_time_conversions.h"
47 #include "private/bionic_tls.h"
48
49 /* a mutex attribute holds the following fields
50 *
51 * bits: name description
52 * 0-3 type type of mutex
53 * 4 shared process-shared flag
54 * 5 protocol whether it is a priority inherit mutex.
55 */
56 #define MUTEXATTR_TYPE_MASK 0x000f
57 #define MUTEXATTR_SHARED_MASK 0x0010
58 #define MUTEXATTR_PROTOCOL_MASK 0x0020
59
60 #define MUTEXATTR_PROTOCOL_SHIFT 5
61
pthread_mutexattr_init(pthread_mutexattr_t * attr)62 int pthread_mutexattr_init(pthread_mutexattr_t *attr)
63 {
64 *attr = PTHREAD_MUTEX_DEFAULT;
65 return 0;
66 }
67
pthread_mutexattr_destroy(pthread_mutexattr_t * attr)68 int pthread_mutexattr_destroy(pthread_mutexattr_t *attr)
69 {
70 *attr = -1;
71 return 0;
72 }
73
pthread_mutexattr_gettype(const pthread_mutexattr_t * attr,int * type_p)74 int pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type_p)
75 {
76 int type = (*attr & MUTEXATTR_TYPE_MASK);
77
78 if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK) {
79 return EINVAL;
80 }
81
82 *type_p = type;
83 return 0;
84 }
85
pthread_mutexattr_settype(pthread_mutexattr_t * attr,int type)86 int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type)
87 {
88 if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK ) {
89 return EINVAL;
90 }
91
92 *attr = (*attr & ~MUTEXATTR_TYPE_MASK) | type;
93 return 0;
94 }
95
96 /* process-shared mutexes are not supported at the moment */
97
pthread_mutexattr_setpshared(pthread_mutexattr_t * attr,int pshared)98 int pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
99 {
100 switch (pshared) {
101 case PTHREAD_PROCESS_PRIVATE:
102 *attr &= ~MUTEXATTR_SHARED_MASK;
103 return 0;
104
105 case PTHREAD_PROCESS_SHARED:
106 /* our current implementation of pthread actually supports shared
107 * mutexes but won't cleanup if a process dies with the mutex held.
108 * Nevertheless, it's better than nothing. Shared mutexes are used
109 * by surfaceflinger and audioflinger.
110 */
111 *attr |= MUTEXATTR_SHARED_MASK;
112 return 0;
113 }
114 return EINVAL;
115 }
116
pthread_mutexattr_getpshared(const pthread_mutexattr_t * attr,int * pshared)117 int pthread_mutexattr_getpshared(const pthread_mutexattr_t* attr, int* pshared) {
118 *pshared = (*attr & MUTEXATTR_SHARED_MASK) ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE;
119 return 0;
120 }
121
pthread_mutexattr_setprotocol(pthread_mutexattr_t * attr,int protocol)122 int pthread_mutexattr_setprotocol(pthread_mutexattr_t* attr, int protocol) {
123 if (protocol != PTHREAD_PRIO_NONE && protocol != PTHREAD_PRIO_INHERIT) {
124 return EINVAL;
125 }
126 *attr = (*attr & ~MUTEXATTR_PROTOCOL_MASK) | (protocol << MUTEXATTR_PROTOCOL_SHIFT);
127 return 0;
128 }
129
pthread_mutexattr_getprotocol(const pthread_mutexattr_t * attr,int * protocol)130 int pthread_mutexattr_getprotocol(const pthread_mutexattr_t* attr, int* protocol) {
131 *protocol = (*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT;
132 return 0;
133 }
134
135 // Priority Inheritance mutex implementation
136 struct PIMutex {
137 // mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck), constant during lifetime
138 uint8_t type;
139 // process-shared flag, constant during lifetime
140 bool shared;
141 // <number of times a thread holding a recursive PI mutex> - 1
142 uint16_t counter;
143 // owner_tid is read/written by both userspace code and kernel code. It includes three fields:
144 // FUTEX_WAITERS, FUTEX_OWNER_DIED and FUTEX_TID_MASK.
145 atomic_int owner_tid;
146 };
147
PIMutexTryLock(PIMutex & mutex)148 static inline __always_inline int PIMutexTryLock(PIMutex& mutex) {
149 pid_t tid = __get_thread()->tid;
150 // Handle common case first.
151 int old_owner = 0;
152 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
153 &old_owner, tid,
154 memory_order_acquire,
155 memory_order_relaxed))) {
156 return 0;
157 }
158 if (tid == (old_owner & FUTEX_TID_MASK)) {
159 // We already own this mutex.
160 if (mutex.type == PTHREAD_MUTEX_NORMAL) {
161 return EBUSY;
162 }
163 if (mutex.type == PTHREAD_MUTEX_ERRORCHECK) {
164 return EDEADLK;
165 }
166 if (mutex.counter == 0xffff) {
167 return EAGAIN;
168 }
169 mutex.counter++;
170 return 0;
171 }
172 return EBUSY;
173 }
174
175 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
176 // ARM/ARM64, which increases at most 20 percent overhead. So make it noinline.
PIMutexTimedLock(PIMutex & mutex,bool use_realtime_clock,const timespec * abs_timeout)177 static int __attribute__((noinline)) PIMutexTimedLock(PIMutex& mutex,
178 bool use_realtime_clock,
179 const timespec* abs_timeout) {
180 int ret = PIMutexTryLock(mutex);
181 if (__predict_true(ret == 0)) {
182 return 0;
183 }
184 if (ret == EBUSY) {
185 ScopedTrace trace("Contending for pthread mutex");
186 ret = -__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, use_realtime_clock, abs_timeout);
187 }
188 return ret;
189 }
190
PIMutexUnlock(PIMutex & mutex)191 static int PIMutexUnlock(PIMutex& mutex) {
192 pid_t tid = __get_thread()->tid;
193 int old_owner = tid;
194 // Handle common case first.
195 if (__predict_true(mutex.type == PTHREAD_MUTEX_NORMAL)) {
196 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
197 &old_owner, 0,
198 memory_order_release,
199 memory_order_relaxed))) {
200 return 0;
201 }
202 } else {
203 old_owner = atomic_load_explicit(&mutex.owner_tid, memory_order_relaxed);
204 }
205
206 if (tid != (old_owner & FUTEX_TID_MASK)) {
207 // The mutex can only be unlocked by the thread who owns it.
208 return EPERM;
209 }
210 if (mutex.type == PTHREAD_MUTEX_RECURSIVE) {
211 if (mutex.counter != 0u) {
212 --mutex.counter;
213 return 0;
214 }
215 }
216 if (old_owner == tid) {
217 // No thread is waiting.
218 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
219 &old_owner, 0,
220 memory_order_release,
221 memory_order_relaxed))) {
222 return 0;
223 }
224 }
225 return -__futex_pi_unlock(&mutex.owner_tid, mutex.shared);
226 }
227
PIMutexDestroy(PIMutex & mutex)228 static int PIMutexDestroy(PIMutex& mutex) {
229 // The mutex should be in unlocked state (owner_tid == 0) when destroyed.
230 // Store 0xffffffff to make the mutex unusable.
231 int old_owner = 0;
232 if (atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, 0xffffffff,
233 memory_order_relaxed, memory_order_relaxed)) {
234 return 0;
235 }
236 return EBUSY;
237 }
238
239 #if !defined(__LP64__)
240
241 namespace PIMutexAllocator {
242 // pthread_mutex_t has only 4 bytes in 32-bit programs, which are not enough to hold PIMutex.
243 // So we use malloc to allocate PIMutexes and use 16-bit of pthread_mutex_t as indexes to find
244 // the allocated PIMutexes. This allows at most 65536 PI mutexes.
245 // When calling operations like pthread_mutex_lock/unlock, the 16-bit index is mapped to the
246 // corresponding PIMutex. To make the map operation fast, we use a lockless mapping method:
247 // Once a PIMutex is allocated, all the data used to map index to the PIMutex isn't changed until
248 // it is destroyed.
249 // Below are the data structures:
250 // // struct Node contains a PIMutex.
251 // typedef Node NodeArray[256];
252 // typedef NodeArray* NodeArrayP;
253 // NodeArrayP nodes[256];
254 //
255 // A 16-bit index is mapped to Node as below:
256 // (*nodes[index >> 8])[index & 0xff]
257 //
258 // Also use a free list to allow O(1) finding recycled PIMutexes.
259
260 union Node {
261 PIMutex mutex;
262 int next_free_id; // If not -1, refer to the next node in the free PIMutex list.
263 };
264 typedef Node NodeArray[256];
265 typedef NodeArray* NodeArrayP;
266
267 // lock_ protects below items.
268 static Lock lock;
269 static NodeArrayP* nodes;
270 static int next_to_alloc_id;
271 static int first_free_id = -1; // If not -1, refer to the first node in the free PIMutex list.
272
IdToNode(int id)273 static inline __always_inline Node& IdToNode(int id) {
274 return (*nodes[id >> 8])[id & 0xff];
275 }
276
IdToPIMutex(int id)277 static inline __always_inline PIMutex& IdToPIMutex(int id) {
278 return IdToNode(id).mutex;
279 }
280
AllocIdLocked()281 static int AllocIdLocked() {
282 if (first_free_id != -1) {
283 int result = first_free_id;
284 first_free_id = IdToNode(result).next_free_id;
285 return result;
286 }
287 if (next_to_alloc_id >= 0x10000) {
288 return -1;
289 }
290 int array_pos = next_to_alloc_id >> 8;
291 int node_pos = next_to_alloc_id & 0xff;
292 if (node_pos == 0) {
293 if (array_pos == 0) {
294 nodes = static_cast<NodeArray**>(calloc(256, sizeof(NodeArray*)));
295 if (nodes == nullptr) {
296 return -1;
297 }
298 }
299 nodes[array_pos] = static_cast<NodeArray*>(malloc(sizeof(NodeArray)));
300 if (nodes[array_pos] == nullptr) {
301 return -1;
302 }
303 }
304 return next_to_alloc_id++;
305 }
306
307 // If succeed, return an id referring to a PIMutex, otherwise return -1.
308 // A valid id is in range [0, 0xffff].
AllocId()309 static int AllocId() {
310 lock.lock();
311 int result = AllocIdLocked();
312 lock.unlock();
313 if (result != -1) {
314 memset(&IdToPIMutex(result), 0, sizeof(PIMutex));
315 }
316 return result;
317 }
318
FreeId(int id)319 static void FreeId(int id) {
320 lock.lock();
321 IdToNode(id).next_free_id = first_free_id;
322 first_free_id = id;
323 lock.unlock();
324 }
325
326 } // namespace PIMutexAllocator
327
328 #endif // !defined(__LP64__)
329
330
331 /* Convenience macro, creates a mask of 'bits' bits that starts from
332 * the 'shift'-th least significant bit in a 32-bit word.
333 *
334 * Examples: FIELD_MASK(0,4) -> 0xf
335 * FIELD_MASK(16,9) -> 0x1ff0000
336 */
337 #define FIELD_MASK(shift,bits) (((1 << (bits))-1) << (shift))
338
339 /* This one is used to create a bit pattern from a given field value */
340 #define FIELD_TO_BITS(val,shift,bits) (((val) & ((1 << (bits))-1)) << (shift))
341
342 /* And this one does the opposite, i.e. extract a field's value from a bit pattern */
343 #define FIELD_FROM_BITS(val,shift,bits) (((val) >> (shift)) & ((1 << (bits))-1))
344
345 /* Convenience macros.
346 *
347 * These are used to form or modify the bit pattern of a given mutex value
348 */
349
350 /* Mutex state:
351 *
352 * 0 for unlocked
353 * 1 for locked, no waiters
354 * 2 for locked, maybe waiters
355 */
356 #define MUTEX_STATE_SHIFT 0
357 #define MUTEX_STATE_LEN 2
358
359 #define MUTEX_STATE_MASK FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
360 #define MUTEX_STATE_FROM_BITS(v) FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
361 #define MUTEX_STATE_TO_BITS(v) FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
362
363 #define MUTEX_STATE_UNLOCKED 0 /* must be 0 to match PTHREAD_MUTEX_INITIALIZER */
364 #define MUTEX_STATE_LOCKED_UNCONTENDED 1 /* must be 1 due to atomic dec in unlock operation */
365 #define MUTEX_STATE_LOCKED_CONTENDED 2 /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */
366
367 #define MUTEX_STATE_BITS_UNLOCKED MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
368 #define MUTEX_STATE_BITS_LOCKED_UNCONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
369 #define MUTEX_STATE_BITS_LOCKED_CONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)
370
371 // Return true iff the mutex is unlocked.
372 #define MUTEX_STATE_BITS_IS_UNLOCKED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_UNLOCKED)
373
374 // Return true iff the mutex is locked with no waiters.
375 #define MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED)
376
377 // return true iff the mutex is locked with maybe waiters.
378 #define MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED)
379
380 /* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */
381 #define MUTEX_STATE_BITS_FLIP_CONTENTION(v) ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED))
382
383 /* Mutex counter:
384 *
385 * We need to check for overflow before incrementing, and we also need to
386 * detect when the counter is 0
387 */
388 #define MUTEX_COUNTER_SHIFT 2
389 #define MUTEX_COUNTER_LEN 11
390 #define MUTEX_COUNTER_MASK FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN)
391
392 #define MUTEX_COUNTER_BITS_WILL_OVERFLOW(v) (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK)
393 #define MUTEX_COUNTER_BITS_IS_ZERO(v) (((v) & MUTEX_COUNTER_MASK) == 0)
394
395 /* Used to increment the counter directly after overflow has been checked */
396 #define MUTEX_COUNTER_BITS_ONE FIELD_TO_BITS(1, MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN)
397
398 /* Mutex shared bit flag
399 *
400 * This flag is set to indicate that the mutex is shared among processes.
401 * This changes the futex opcode we use for futex wait/wake operations
402 * (non-shared operations are much faster).
403 */
404 #define MUTEX_SHARED_SHIFT 13
405 #define MUTEX_SHARED_MASK FIELD_MASK(MUTEX_SHARED_SHIFT,1)
406
407 /* Mutex type:
408 * We support normal, recursive and errorcheck mutexes.
409 */
410 #define MUTEX_TYPE_SHIFT 14
411 #define MUTEX_TYPE_LEN 2
412 #define MUTEX_TYPE_MASK FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)
413
414 #define MUTEX_TYPE_TO_BITS(t) FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)
415
416 #define MUTEX_TYPE_BITS_NORMAL MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_NORMAL)
417 #define MUTEX_TYPE_BITS_RECURSIVE MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_RECURSIVE)
418 #define MUTEX_TYPE_BITS_ERRORCHECK MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_ERRORCHECK)
419 // Use a special mutex type to mark priority inheritance mutexes.
420 #define PI_MUTEX_STATE MUTEX_TYPE_TO_BITS(3)
421
422 // For a PI mutex, it includes below fields:
423 // Atomic(uint16_t) state;
424 // PIMutex pi_mutex; // uint16_t pi_mutex_id in 32-bit programs
425 //
426 // state holds the following fields:
427 //
428 // bits: name description
429 // 15-14 type mutex type, should be 3
430 // 13-0 padding should be 0
431 //
432 // pi_mutex holds the state of a PI mutex.
433 // pi_mutex_id holds an integer to find the state of a PI mutex.
434 //
435 // For a Non-PI mutex, it includes below fields:
436 // Atomic(uint16_t) state;
437 // atomic_int owner_tid; // Atomic(uint16_t) in 32-bit programs
438 //
439 // state holds the following fields:
440 //
441 // bits: name description
442 // 15-14 type mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck)
443 // 13 shared process-shared flag
444 // 12-2 counter <number of times a thread holding a recursive Non-PI mutex> - 1
445 // 1-0 state lock state (0, 1 or 2)
446 //
447 // bits 15-13 are constant during the lifetime of the mutex.
448 //
449 // owner_tid is used only in recursive and errorcheck Non-PI mutexes to hold the mutex owner
450 // thread id.
451 //
452 // PI mutexes and Non-PI mutexes are distinguished by checking type field in state.
453 #if defined(__LP64__)
454 struct pthread_mutex_internal_t {
455 _Atomic(uint16_t) state;
456 uint16_t __pad;
457 union {
458 atomic_int owner_tid;
459 PIMutex pi_mutex;
460 };
461 char __reserved[28];
462
ToPIMutexpthread_mutex_internal_t463 PIMutex& ToPIMutex() {
464 return pi_mutex;
465 }
466
FreePIMutexpthread_mutex_internal_t467 void FreePIMutex() {
468 }
469 } __attribute__((aligned(4)));
470
471 #else
472 struct pthread_mutex_internal_t {
473 _Atomic(uint16_t) state;
474 union {
475 _Atomic(uint16_t) owner_tid;
476 uint16_t pi_mutex_id;
477 };
478
ToPIMutexpthread_mutex_internal_t479 PIMutex& ToPIMutex() {
480 return PIMutexAllocator::IdToPIMutex(pi_mutex_id);
481 }
482
FreePIMutexpthread_mutex_internal_t483 void FreePIMutex() {
484 PIMutexAllocator::FreeId(pi_mutex_id);
485 }
486 } __attribute__((aligned(4)));
487 #endif
488
489 static_assert(sizeof(pthread_mutex_t) == sizeof(pthread_mutex_internal_t),
490 "pthread_mutex_t should actually be pthread_mutex_internal_t in implementation.");
491
492 // For binary compatibility with old version of pthread_mutex_t, we can't use more strict alignment
493 // than 4-byte alignment.
494 static_assert(alignof(pthread_mutex_t) == 4,
495 "pthread_mutex_t should fulfill the alignment of pthread_mutex_internal_t.");
496
__get_internal_mutex(pthread_mutex_t * mutex_interface)497 static inline pthread_mutex_internal_t* __get_internal_mutex(pthread_mutex_t* mutex_interface) {
498 return reinterpret_cast<pthread_mutex_internal_t*>(mutex_interface);
499 }
500
pthread_mutex_init(pthread_mutex_t * mutex_interface,const pthread_mutexattr_t * attr)501 int pthread_mutex_init(pthread_mutex_t* mutex_interface, const pthread_mutexattr_t* attr) {
502 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
503
504 memset(mutex, 0, sizeof(pthread_mutex_internal_t));
505
506 if (__predict_true(attr == nullptr)) {
507 atomic_init(&mutex->state, MUTEX_TYPE_BITS_NORMAL);
508 return 0;
509 }
510
511 uint16_t state = 0;
512 if ((*attr & MUTEXATTR_SHARED_MASK) != 0) {
513 state |= MUTEX_SHARED_MASK;
514 }
515
516 switch (*attr & MUTEXATTR_TYPE_MASK) {
517 case PTHREAD_MUTEX_NORMAL:
518 state |= MUTEX_TYPE_BITS_NORMAL;
519 break;
520 case PTHREAD_MUTEX_RECURSIVE:
521 state |= MUTEX_TYPE_BITS_RECURSIVE;
522 break;
523 case PTHREAD_MUTEX_ERRORCHECK:
524 state |= MUTEX_TYPE_BITS_ERRORCHECK;
525 break;
526 default:
527 return EINVAL;
528 }
529
530 if (((*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT) == PTHREAD_PRIO_INHERIT) {
531 #if !defined(__LP64__)
532 if (state & MUTEX_SHARED_MASK) {
533 return EINVAL;
534 }
535 int id = PIMutexAllocator::AllocId();
536 if (id == -1) {
537 return ENOMEM;
538 }
539 mutex->pi_mutex_id = id;
540 #endif
541 atomic_init(&mutex->state, PI_MUTEX_STATE);
542 PIMutex& pi_mutex = mutex->ToPIMutex();
543 pi_mutex.type = *attr & MUTEXATTR_TYPE_MASK;
544 pi_mutex.shared = (*attr & MUTEXATTR_SHARED_MASK) != 0;
545 } else {
546 atomic_init(&mutex->state, state);
547 atomic_init(&mutex->owner_tid, 0);
548 }
549 return 0;
550 }
551
552 // namespace for Non-PI mutex routines.
553 namespace NonPI {
554
NormalMutexTryLock(pthread_mutex_internal_t * mutex,uint16_t shared)555 static inline __always_inline int NormalMutexTryLock(pthread_mutex_internal_t* mutex,
556 uint16_t shared) {
557 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED;
558 const uint16_t locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
559
560 uint16_t old_state = unlocked;
561 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
562 locked_uncontended, memory_order_acquire, memory_order_relaxed))) {
563 return 0;
564 }
565 return EBUSY;
566 }
567
568 /*
569 * Lock a normal Non-PI mutex.
570 *
571 * As noted above, there are three states:
572 * 0 (unlocked, no contention)
573 * 1 (locked, no contention)
574 * 2 (locked, contention)
575 *
576 * Non-recursive mutexes don't use the thread-id or counter fields, and the
577 * "type" value is zero, so the only bits that will be set are the ones in
578 * the lock state field.
579 */
NormalMutexLock(pthread_mutex_internal_t * mutex,uint16_t shared,bool use_realtime_clock,const timespec * abs_timeout_or_null)580 static inline __always_inline int NormalMutexLock(pthread_mutex_internal_t* mutex,
581 uint16_t shared,
582 bool use_realtime_clock,
583 const timespec* abs_timeout_or_null) {
584 if (__predict_true(NormalMutexTryLock(mutex, shared) == 0)) {
585 return 0;
586 }
587 int result = check_timespec(abs_timeout_or_null, true);
588 if (result != 0) {
589 return result;
590 }
591
592 ScopedTrace trace("Contending for pthread mutex");
593
594 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED;
595 const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
596
597 // We want to go to sleep until the mutex is available, which requires
598 // promoting it to locked_contended. We need to swap in the new state
599 // and then wait until somebody wakes us up.
600 // An atomic_exchange is used to compete with other threads for the lock.
601 // If it returns unlocked, we have acquired the lock, otherwise another
602 // thread still holds the lock and we should wait again.
603 // If lock is acquired, an acquire fence is needed to make all memory accesses
604 // made by other threads visible to the current CPU.
605 while (atomic_exchange_explicit(&mutex->state, locked_contended,
606 memory_order_acquire) != unlocked) {
607 if (__futex_wait_ex(&mutex->state, shared, locked_contended, use_realtime_clock,
608 abs_timeout_or_null) == -ETIMEDOUT) {
609 return ETIMEDOUT;
610 }
611 }
612 return 0;
613 }
614
615 /*
616 * Release a normal Non-PI mutex. The caller is responsible for determining
617 * that we are in fact the owner of this lock.
618 */
NormalMutexUnlock(pthread_mutex_internal_t * mutex,uint16_t shared)619 static inline __always_inline void NormalMutexUnlock(pthread_mutex_internal_t* mutex,
620 uint16_t shared) {
621 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED;
622 const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
623
624 // We use an atomic_exchange to release the lock. If locked_contended state
625 // is returned, some threads is waiting for the lock and we need to wake up
626 // one of them.
627 // A release fence is required to make previous stores visible to next
628 // lock owner threads.
629 if (atomic_exchange_explicit(&mutex->state, unlocked,
630 memory_order_release) == locked_contended) {
631 // Wake up one waiting thread. We don't know which thread will be
632 // woken or when it'll start executing -- futexes make no guarantees
633 // here. There may not even be a thread waiting.
634 //
635 // The newly-woken thread will replace the unlocked state we just set above
636 // with locked_contended state, which means that when it eventually releases
637 // the mutex it will also call FUTEX_WAKE. This results in one extra wake
638 // call whenever a lock is contended, but let us avoid forgetting anyone
639 // without requiring us to track the number of sleepers.
640 //
641 // It's possible for another thread to sneak in and grab the lock between
642 // the exchange above and the wake call below. If the new thread is "slow"
643 // and holds the lock for a while, we'll wake up a sleeper, which will swap
644 // in locked_uncontended state and then go back to sleep since the lock is
645 // still held. If the new thread is "fast", running to completion before
646 // we call wake, the thread we eventually wake will find an unlocked mutex
647 // and will execute. Either way we have correct behavior and nobody is
648 // orphaned on the wait queue.
649 __futex_wake_ex(&mutex->state, shared, 1);
650 }
651 }
652
653 /* This common inlined function is used to increment the counter of a recursive Non-PI mutex.
654 *
655 * If the counter overflows, it will return EAGAIN.
656 * Otherwise, it atomically increments the counter and returns 0.
657 *
658 */
RecursiveIncrement(pthread_mutex_internal_t * mutex,uint16_t old_state)659 static inline __always_inline int RecursiveIncrement(pthread_mutex_internal_t* mutex,
660 uint16_t old_state) {
661 // Detect recursive lock overflow and return EAGAIN.
662 // This is safe because only the owner thread can modify the
663 // counter bits in the mutex value.
664 if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(old_state)) {
665 return EAGAIN;
666 }
667
668 // Other threads are able to change the lower bits (e.g. promoting it to "contended"),
669 // but the mutex counter will not overflow. So we use atomic_fetch_add operation here.
670 // The mutex is already locked by current thread, so we don't need an acquire fence.
671 atomic_fetch_add_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
672 return 0;
673 }
674
675 // Wait on a recursive or errorcheck Non-PI mutex.
RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t * mutex,uint16_t shared,uint16_t old_state,bool use_realtime_clock,const timespec * abs_timeout)676 static inline __always_inline int RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t* mutex,
677 uint16_t shared,
678 uint16_t old_state,
679 bool use_realtime_clock,
680 const timespec* abs_timeout) {
681 // __futex_wait always waits on a 32-bit value. But state is 16-bit. For a normal mutex, the owner_tid
682 // field in mutex is not used. On 64-bit devices, the __pad field in mutex is not used.
683 // But when a recursive or errorcheck mutex is used on 32-bit devices, we need to add the
684 // owner_tid value in the value argument for __futex_wait, otherwise we may always get EAGAIN error.
685
686 #if defined(__LP64__)
687 return __futex_wait_ex(&mutex->state, shared, old_state, use_realtime_clock, abs_timeout);
688
689 #else
690 // This implementation works only when the layout of pthread_mutex_internal_t matches below expectation.
691 // And it is based on the assumption that Android is always in little-endian devices.
692 static_assert(offsetof(pthread_mutex_internal_t, state) == 0, "");
693 static_assert(offsetof(pthread_mutex_internal_t, owner_tid) == 2, "");
694
695 uint32_t owner_tid = atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed);
696 return __futex_wait_ex(&mutex->state, shared, (owner_tid << 16) | old_state,
697 use_realtime_clock, abs_timeout);
698 #endif
699 }
700
701 // Lock a Non-PI mutex.
MutexLockWithTimeout(pthread_mutex_internal_t * mutex,bool use_realtime_clock,const timespec * abs_timeout_or_null)702 static int MutexLockWithTimeout(pthread_mutex_internal_t* mutex, bool use_realtime_clock,
703 const timespec* abs_timeout_or_null) {
704 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
705 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
706 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
707
708 // Handle common case first.
709 if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
710 return NormalMutexLock(mutex, shared, use_realtime_clock, abs_timeout_or_null);
711 }
712
713 // Do we already own this recursive or error-check mutex?
714 pid_t tid = __get_thread()->tid;
715 if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) {
716 if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
717 return EDEADLK;
718 }
719 return RecursiveIncrement(mutex, old_state);
720 }
721
722 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
723 const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
724 const uint16_t locked_contended = mtype | shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
725
726 // First, if the mutex is unlocked, try to quickly acquire it.
727 // In the optimistic case where this works, set the state to locked_uncontended.
728 if (old_state == unlocked) {
729 // If exchanged successfully, an acquire fence is required to make
730 // all memory accesses made by other threads visible to the current CPU.
731 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
732 locked_uncontended, memory_order_acquire, memory_order_relaxed))) {
733 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
734 return 0;
735 }
736 }
737
738 ScopedTrace trace("Contending for pthread mutex");
739
740 while (true) {
741 if (old_state == unlocked) {
742 // NOTE: We put the state to locked_contended since we _know_ there
743 // is contention when we are in this loop. This ensures all waiters
744 // will be unlocked.
745
746 // If exchanged successfully, an acquire fence is required to make
747 // all memory accesses made by other threads visible to the current CPU.
748 if (__predict_true(atomic_compare_exchange_weak_explicit(&mutex->state,
749 &old_state, locked_contended,
750 memory_order_acquire,
751 memory_order_relaxed))) {
752 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
753 return 0;
754 }
755 continue;
756 } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(old_state)) {
757 // We should set it to locked_contended beforing going to sleep. This can make
758 // sure waiters will be woken up eventually.
759
760 int new_state = MUTEX_STATE_BITS_FLIP_CONTENTION(old_state);
761 if (__predict_false(!atomic_compare_exchange_weak_explicit(&mutex->state,
762 &old_state, new_state,
763 memory_order_relaxed,
764 memory_order_relaxed))) {
765 continue;
766 }
767 old_state = new_state;
768 }
769
770 int result = check_timespec(abs_timeout_or_null, true);
771 if (result != 0) {
772 return result;
773 }
774 // We are in locked_contended state, sleep until someone wakes us up.
775 if (RecursiveOrErrorcheckMutexWait(mutex, shared, old_state, use_realtime_clock,
776 abs_timeout_or_null) == -ETIMEDOUT) {
777 return ETIMEDOUT;
778 }
779 old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
780 }
781 }
782
783 } // namespace NonPI
784
IsMutexDestroyed(uint16_t mutex_state)785 static inline __always_inline bool IsMutexDestroyed(uint16_t mutex_state) {
786 return mutex_state == 0xffff;
787 }
788
789 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
790 // ARM64. So make it noinline.
HandleUsingDestroyedMutex(pthread_mutex_t * mutex,const char * function_name)791 static int __attribute__((noinline)) HandleUsingDestroyedMutex(pthread_mutex_t* mutex,
792 const char* function_name) {
793 if (android_get_application_target_sdk_version() >= __ANDROID_API_P__) {
794 __fortify_fatal("%s called on a destroyed mutex (%p)", function_name, mutex);
795 }
796 return EBUSY;
797 }
798
pthread_mutex_lock(pthread_mutex_t * mutex_interface)799 int pthread_mutex_lock(pthread_mutex_t* mutex_interface) {
800 #if !defined(__LP64__)
801 // Some apps depend on being able to pass NULL as a mutex and get EINVAL
802 // back. Don't need to worry about it for LP64 since the ABI is brand new,
803 // but keep compatibility for LP32. http://b/19995172.
804 if (mutex_interface == nullptr) {
805 return EINVAL;
806 }
807 #endif
808
809 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
810 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
811 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
812 // Avoid slowing down fast path of normal mutex lock operation.
813 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
814 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
815 if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) {
816 return 0;
817 }
818 }
819 if (old_state == PI_MUTEX_STATE) {
820 PIMutex& m = mutex->ToPIMutex();
821 // Handle common case first.
822 if (__predict_true(PIMutexTryLock(m) == 0)) {
823 return 0;
824 }
825 return PIMutexTimedLock(mutex->ToPIMutex(), false, nullptr);
826 }
827 if (__predict_false(IsMutexDestroyed(old_state))) {
828 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
829 }
830 return NonPI::MutexLockWithTimeout(mutex, false, nullptr);
831 }
832
pthread_mutex_unlock(pthread_mutex_t * mutex_interface)833 int pthread_mutex_unlock(pthread_mutex_t* mutex_interface) {
834 #if !defined(__LP64__)
835 // Some apps depend on being able to pass NULL as a mutex and get EINVAL
836 // back. Don't need to worry about it for LP64 since the ABI is brand new,
837 // but keep compatibility for LP32. http://b/19995172.
838 if (mutex_interface == nullptr) {
839 return EINVAL;
840 }
841 #endif
842
843 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
844 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
845 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
846 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
847
848 // Handle common case first.
849 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
850 NonPI::NormalMutexUnlock(mutex, shared);
851 return 0;
852 }
853 if (old_state == PI_MUTEX_STATE) {
854 return PIMutexUnlock(mutex->ToPIMutex());
855 }
856 if (__predict_false(IsMutexDestroyed(old_state))) {
857 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
858 }
859
860 // Do we already own this recursive or error-check mutex?
861 pid_t tid = __get_thread()->tid;
862 if ( tid != atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed) ) {
863 return EPERM;
864 }
865
866 // If the counter is > 0, we can simply decrement it atomically.
867 // Since other threads can mutate the lower state bits (and only the
868 // lower state bits), use a compare_exchange loop to do it.
869 if (!MUTEX_COUNTER_BITS_IS_ZERO(old_state)) {
870 // We still own the mutex, so a release fence is not needed.
871 atomic_fetch_sub_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
872 return 0;
873 }
874
875 // The counter is 0, so we'are going to unlock the mutex by resetting its
876 // state to unlocked, we need to perform a atomic_exchange inorder to read
877 // the current state, which will be locked_contended if there may have waiters
878 // to awake.
879 // A release fence is required to make previous stores visible to next
880 // lock owner threads.
881 atomic_store_explicit(&mutex->owner_tid, 0, memory_order_relaxed);
882 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
883 old_state = atomic_exchange_explicit(&mutex->state, unlocked, memory_order_release);
884 if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(old_state)) {
885 __futex_wake_ex(&mutex->state, shared, 1);
886 }
887
888 return 0;
889 }
890
pthread_mutex_trylock(pthread_mutex_t * mutex_interface)891 int pthread_mutex_trylock(pthread_mutex_t* mutex_interface) {
892 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
893
894 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
895 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
896
897 // Handle common case first.
898 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
899 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
900 return NonPI::NormalMutexTryLock(mutex, shared);
901 }
902 if (old_state == PI_MUTEX_STATE) {
903 return PIMutexTryLock(mutex->ToPIMutex());
904 }
905 if (__predict_false(IsMutexDestroyed(old_state))) {
906 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
907 }
908
909 // Do we already own this recursive or error-check mutex?
910 pid_t tid = __get_thread()->tid;
911 if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) {
912 if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
913 return EBUSY;
914 }
915 return NonPI::RecursiveIncrement(mutex, old_state);
916 }
917
918 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
919 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
920 const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
921
922 // Same as pthread_mutex_lock, except that we don't want to wait, and
923 // the only operation that can succeed is a single compare_exchange to acquire the
924 // lock if it is released / not owned by anyone. No need for a complex loop.
925 // If exchanged successfully, an acquire fence is required to make
926 // all memory accesses made by other threads visible to the current CPU.
927 old_state = unlocked;
928 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
929 locked_uncontended,
930 memory_order_acquire,
931 memory_order_relaxed))) {
932 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
933 return 0;
934 }
935 return EBUSY;
936 }
937
938 #if !defined(__LP64__)
pthread_mutex_lock_timeout_np(pthread_mutex_t * mutex_interface,unsigned ms)939 extern "C" int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex_interface, unsigned ms) {
940 timespec ts;
941 timespec_from_ms(ts, ms);
942 timespec abs_timeout;
943 absolute_timespec_from_timespec(abs_timeout, ts, CLOCK_MONOTONIC);
944 int error = NonPI::MutexLockWithTimeout(__get_internal_mutex(mutex_interface), false,
945 &abs_timeout);
946 if (error == ETIMEDOUT) {
947 error = EBUSY;
948 }
949 return error;
950 }
951 #endif
952
__pthread_mutex_timedlock(pthread_mutex_t * mutex_interface,bool use_realtime_clock,const timespec * abs_timeout,const char * function)953 static int __pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, bool use_realtime_clock,
954 const timespec* abs_timeout, const char* function) {
955 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
956 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
957 uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
958 // Handle common case first.
959 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
960 uint16_t shared = (old_state & MUTEX_SHARED_MASK);
961 if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) {
962 return 0;
963 }
964 }
965 if (old_state == PI_MUTEX_STATE) {
966 return PIMutexTimedLock(mutex->ToPIMutex(), use_realtime_clock, abs_timeout);
967 }
968 if (__predict_false(IsMutexDestroyed(old_state))) {
969 return HandleUsingDestroyedMutex(mutex_interface, function);
970 }
971 return NonPI::MutexLockWithTimeout(mutex, use_realtime_clock, abs_timeout);
972 }
973
pthread_mutex_timedlock(pthread_mutex_t * mutex_interface,const struct timespec * abs_timeout)974 int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const struct timespec* abs_timeout) {
975 return __pthread_mutex_timedlock(mutex_interface, true, abs_timeout, __FUNCTION__);
976 }
977
pthread_mutex_timedlock_monotonic_np(pthread_mutex_t * mutex_interface,const struct timespec * abs_timeout)978 int pthread_mutex_timedlock_monotonic_np(pthread_mutex_t* mutex_interface,
979 const struct timespec* abs_timeout) {
980 return __pthread_mutex_timedlock(mutex_interface, false, abs_timeout, __FUNCTION__);
981 }
982
pthread_mutex_destroy(pthread_mutex_t * mutex_interface)983 int pthread_mutex_destroy(pthread_mutex_t* mutex_interface) {
984 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
985 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
986 if (__predict_false(IsMutexDestroyed(old_state))) {
987 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
988 }
989 if (old_state == PI_MUTEX_STATE) {
990 int result = PIMutexDestroy(mutex->ToPIMutex());
991 if (result == 0) {
992 mutex->FreePIMutex();
993 atomic_store(&mutex->state, 0xffff);
994 }
995 return result;
996 }
997 // Store 0xffff to make the mutex unusable. Although POSIX standard says it is undefined
998 // behavior to destroy a locked mutex, we prefer not to change mutex->state in that situation.
999 if (MUTEX_STATE_BITS_IS_UNLOCKED(old_state) &&
1000 atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, 0xffff,
1001 memory_order_relaxed, memory_order_relaxed)) {
1002 return 0;
1003 }
1004 return EBUSY;
1005 }
1006