• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *  * Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <pthread.h>
30 
31 #include <errno.h>
32 #include <limits.h>
33 #include <stdatomic.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <sys/cdefs.h>
37 #include <sys/mman.h>
38 #include <unistd.h>
39 
40 #include "pthread_internal.h"
41 
42 #include "private/bionic_constants.h"
43 #include "private/bionic_fortify.h"
44 #include "private/bionic_futex.h"
45 #include "private/bionic_sdk_version.h"
46 #include "private/bionic_systrace.h"
47 #include "private/bionic_time_conversions.h"
48 #include "private/bionic_tls.h"
49 
50 /* a mutex attribute holds the following fields
51  *
52  * bits:     name       description
53  * 0-3       type       type of mutex
54  * 4         shared     process-shared flag
55  * 5         protocol   whether it is a priority inherit mutex.
56  */
57 #define  MUTEXATTR_TYPE_MASK   0x000f
58 #define  MUTEXATTR_SHARED_MASK 0x0010
59 #define MUTEXATTR_PROTOCOL_MASK 0x0020
60 
61 #define MUTEXATTR_PROTOCOL_SHIFT 5
62 
pthread_mutexattr_init(pthread_mutexattr_t * attr)63 int pthread_mutexattr_init(pthread_mutexattr_t *attr)
64 {
65     *attr = PTHREAD_MUTEX_DEFAULT;
66     return 0;
67 }
68 
pthread_mutexattr_destroy(pthread_mutexattr_t * attr)69 int pthread_mutexattr_destroy(pthread_mutexattr_t *attr)
70 {
71     *attr = -1;
72     return 0;
73 }
74 
pthread_mutexattr_gettype(const pthread_mutexattr_t * attr,int * type_p)75 int pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type_p)
76 {
77     int type = (*attr & MUTEXATTR_TYPE_MASK);
78 
79     if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK) {
80         return EINVAL;
81     }
82 
83     *type_p = type;
84     return 0;
85 }
86 
pthread_mutexattr_settype(pthread_mutexattr_t * attr,int type)87 int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type)
88 {
89     if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK ) {
90         return EINVAL;
91     }
92 
93     *attr = (*attr & ~MUTEXATTR_TYPE_MASK) | type;
94     return 0;
95 }
96 
97 /* process-shared mutexes are not supported at the moment */
98 
pthread_mutexattr_setpshared(pthread_mutexattr_t * attr,int pshared)99 int pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int  pshared)
100 {
101     switch (pshared) {
102     case PTHREAD_PROCESS_PRIVATE:
103         *attr &= ~MUTEXATTR_SHARED_MASK;
104         return 0;
105 
106     case PTHREAD_PROCESS_SHARED:
107         /* our current implementation of pthread actually supports shared
108          * mutexes but won't cleanup if a process dies with the mutex held.
109          * Nevertheless, it's better than nothing. Shared mutexes are used
110          * by surfaceflinger and audioflinger.
111          */
112         *attr |= MUTEXATTR_SHARED_MASK;
113         return 0;
114     }
115     return EINVAL;
116 }
117 
pthread_mutexattr_getpshared(const pthread_mutexattr_t * attr,int * pshared)118 int pthread_mutexattr_getpshared(const pthread_mutexattr_t* attr, int* pshared) {
119     *pshared = (*attr & MUTEXATTR_SHARED_MASK) ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE;
120     return 0;
121 }
122 
pthread_mutexattr_setprotocol(pthread_mutexattr_t * attr,int protocol)123 int pthread_mutexattr_setprotocol(pthread_mutexattr_t* attr, int protocol) {
124     if (protocol != PTHREAD_PRIO_NONE && protocol != PTHREAD_PRIO_INHERIT) {
125         return EINVAL;
126     }
127     *attr = (*attr & ~MUTEXATTR_PROTOCOL_MASK) | (protocol << MUTEXATTR_PROTOCOL_SHIFT);
128     return 0;
129 }
130 
pthread_mutexattr_getprotocol(const pthread_mutexattr_t * attr,int * protocol)131 int pthread_mutexattr_getprotocol(const pthread_mutexattr_t* attr, int* protocol) {
132     *protocol = (*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT;
133     return 0;
134 }
135 
136 // Priority Inheritance mutex implementation
137 struct PIMutex {
138   // mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck), constant during lifetime
139   uint8_t type;
140   // process-shared flag, constant during lifetime
141   bool shared;
142   // <number of times a thread holding a recursive PI mutex> - 1
143   uint16_t counter;
144   // owner_tid is read/written by both userspace code and kernel code. It includes three fields:
145   // FUTEX_WAITERS, FUTEX_OWNER_DIED and FUTEX_TID_MASK.
146   atomic_int owner_tid;
147 };
148 
PIMutexTryLock(PIMutex & mutex)149 static inline __always_inline int PIMutexTryLock(PIMutex& mutex) {
150     pid_t tid = __get_thread()->tid;
151     // Handle common case first.
152     int old_owner = 0;
153     if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
154                                                                &old_owner, tid,
155                                                                memory_order_acquire,
156                                                                memory_order_relaxed))) {
157         return 0;
158     }
159     if (tid == (old_owner & FUTEX_TID_MASK)) {
160         // We already own this mutex.
161         if (mutex.type == PTHREAD_MUTEX_NORMAL) {
162             return EBUSY;
163         }
164         if (mutex.type == PTHREAD_MUTEX_ERRORCHECK) {
165             return EDEADLK;
166         }
167         if (mutex.counter == 0xffff) {
168             return EAGAIN;
169         }
170         mutex.counter++;
171         return 0;
172     }
173     return EBUSY;
174 }
175 
176 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
177 // ARM/ARM64, which increases at most 20 percent overhead. So make it noinline.
PIMutexTimedLock(PIMutex & mutex,bool use_realtime_clock,const timespec * abs_timeout)178 static int  __attribute__((noinline)) PIMutexTimedLock(PIMutex& mutex,
179                                                        bool use_realtime_clock,
180                                                        const timespec* abs_timeout) {
181     int ret = PIMutexTryLock(mutex);
182     if (__predict_true(ret == 0)) {
183         return 0;
184     }
185     if (ret == EBUSY) {
186         ScopedTrace trace("Contending for pthread mutex");
187         ret = -__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, use_realtime_clock, abs_timeout);
188     }
189     return ret;
190 }
191 
PIMutexUnlock(PIMutex & mutex)192 static int PIMutexUnlock(PIMutex& mutex) {
193     pid_t tid = __get_thread()->tid;
194     int old_owner = tid;
195     // Handle common case first.
196     if (__predict_true(mutex.type == PTHREAD_MUTEX_NORMAL)) {
197         if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
198                                                                    &old_owner, 0,
199                                                                    memory_order_release,
200                                                                    memory_order_relaxed))) {
201             return 0;
202         }
203     }
204 
205     if (tid != (old_owner & FUTEX_TID_MASK)) {
206         // The mutex can only be unlocked by the thread who owns it.
207         return EPERM;
208     }
209     if (mutex.type == PTHREAD_MUTEX_RECURSIVE) {
210         if (mutex.counter != 0u) {
211             --mutex.counter;
212             return 0;
213         }
214     }
215     if (old_owner == tid) {
216         // No thread is waiting.
217         if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid,
218                                                                    &old_owner, 0,
219                                                                    memory_order_release,
220                                                                    memory_order_relaxed))) {
221             return 0;
222         }
223     }
224     return -__futex_pi_unlock(&mutex.owner_tid, mutex.shared);
225 }
226 
PIMutexDestroy(PIMutex & mutex)227 static int PIMutexDestroy(PIMutex& mutex) {
228     // The mutex should be in unlocked state (owner_tid == 0) when destroyed.
229     // Store 0xffffffff to make the mutex unusable.
230     int old_owner = 0;
231     if (atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, 0xffffffff,
232                                                 memory_order_relaxed, memory_order_relaxed)) {
233         return 0;
234     }
235     return EBUSY;
236 }
237 
238 #if !defined(__LP64__)
239 
240 namespace PIMutexAllocator {
241 // pthread_mutex_t has only 4 bytes in 32-bit programs, which are not enough to hold PIMutex.
242 // So we use malloc to allocate PIMutexes and use 16-bit of pthread_mutex_t as indexes to find
243 // the allocated PIMutexes. This allows at most 65536 PI mutexes.
244 // When calling operations like pthread_mutex_lock/unlock, the 16-bit index is mapped to the
245 // corresponding PIMutex. To make the map operation fast, we use a lockless mapping method:
246 //   Once a PIMutex is allocated, all the data used to map index to the PIMutex isn't changed until
247 //   it is destroyed.
248 // Below are the data structures:
249 //   // struct Node contains a PIMutex.
250 //   typedef Node NodeArray[256];
251 //   typedef NodeArray* NodeArrayP;
252 //   NodeArrayP nodes[256];
253 //
254 // A 16-bit index is mapped to Node as below:
255 //   (*nodes[index >> 8])[index & 0xff]
256 //
257 // Also use a free list to allow O(1) finding recycled PIMutexes.
258 
259 union Node {
260     PIMutex mutex;
261     int next_free_id;  // If not -1, refer to the next node in the free PIMutex list.
262 };
263 typedef Node NodeArray[256];
264 typedef NodeArray* NodeArrayP;
265 
266 // lock_ protects below items.
267 static Lock lock;
268 static NodeArrayP* nodes;
269 static int next_to_alloc_id;
270 static int first_free_id = -1;  // If not -1, refer to the first node in the free PIMutex list.
271 
IdToNode(int id)272 static inline __always_inline Node& IdToNode(int id) {
273     return (*nodes[id >> 8])[id & 0xff];
274 }
275 
IdToPIMutex(int id)276 static inline __always_inline PIMutex& IdToPIMutex(int id) {
277     return IdToNode(id).mutex;
278 }
279 
AllocIdLocked()280 static int AllocIdLocked() {
281     if (first_free_id != -1) {
282         int result = first_free_id;
283         first_free_id = IdToNode(result).next_free_id;
284         return result;
285     }
286     if (next_to_alloc_id >= 0x10000) {
287         return -1;
288     }
289     int array_pos = next_to_alloc_id >> 8;
290     int node_pos = next_to_alloc_id & 0xff;
291     if (node_pos == 0) {
292         if (array_pos == 0) {
293             nodes = static_cast<NodeArray**>(calloc(256, sizeof(NodeArray*)));
294             if (nodes == nullptr) {
295                 return -1;
296             }
297         }
298         nodes[array_pos] = static_cast<NodeArray*>(malloc(sizeof(NodeArray)));
299         if (nodes[array_pos] == nullptr) {
300             return -1;
301         }
302     }
303     return next_to_alloc_id++;
304 }
305 
306 // If succeed, return an id referring to a PIMutex, otherwise return -1.
307 // A valid id is in range [0, 0xffff].
AllocId()308 static int AllocId() {
309     lock.lock();
310     int result = AllocIdLocked();
311     lock.unlock();
312     if (result != -1) {
313         memset(&IdToPIMutex(result), 0, sizeof(PIMutex));
314     }
315     return result;
316 }
317 
FreeId(int id)318 static void FreeId(int id) {
319     lock.lock();
320     IdToNode(id).next_free_id = first_free_id;
321     first_free_id = id;
322     lock.unlock();
323 }
324 
325 }  // namespace PIMutexAllocator
326 
327 #endif  // !defined(__LP64__)
328 
329 
330 /* Convenience macro, creates a mask of 'bits' bits that starts from
331  * the 'shift'-th least significant bit in a 32-bit word.
332  *
333  * Examples: FIELD_MASK(0,4)  -> 0xf
334  *           FIELD_MASK(16,9) -> 0x1ff0000
335  */
336 #define  FIELD_MASK(shift,bits)           (((1 << (bits))-1) << (shift))
337 
338 /* This one is used to create a bit pattern from a given field value */
339 #define  FIELD_TO_BITS(val,shift,bits)    (((val) & ((1 << (bits))-1)) << (shift))
340 
341 /* And this one does the opposite, i.e. extract a field's value from a bit pattern */
342 #define  FIELD_FROM_BITS(val,shift,bits)  (((val) >> (shift)) & ((1 << (bits))-1))
343 
344 /* Convenience macros.
345  *
346  * These are used to form or modify the bit pattern of a given mutex value
347  */
348 
349 /* Mutex state:
350  *
351  * 0 for unlocked
352  * 1 for locked, no waiters
353  * 2 for locked, maybe waiters
354  */
355 #define  MUTEX_STATE_SHIFT      0
356 #define  MUTEX_STATE_LEN        2
357 
358 #define  MUTEX_STATE_MASK           FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
359 #define  MUTEX_STATE_FROM_BITS(v)   FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
360 #define  MUTEX_STATE_TO_BITS(v)     FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
361 
362 #define  MUTEX_STATE_UNLOCKED            0   /* must be 0 to match PTHREAD_MUTEX_INITIALIZER */
363 #define  MUTEX_STATE_LOCKED_UNCONTENDED  1   /* must be 1 due to atomic dec in unlock operation */
364 #define  MUTEX_STATE_LOCKED_CONTENDED    2   /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */
365 
366 #define  MUTEX_STATE_BITS_UNLOCKED            MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
367 #define  MUTEX_STATE_BITS_LOCKED_UNCONTENDED  MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
368 #define  MUTEX_STATE_BITS_LOCKED_CONTENDED    MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)
369 
370 // Return true iff the mutex is unlocked.
371 #define MUTEX_STATE_BITS_IS_UNLOCKED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_UNLOCKED)
372 
373 // Return true iff the mutex is locked with no waiters.
374 #define MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v)  (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED)
375 
376 // return true iff the mutex is locked with maybe waiters.
377 #define MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v)   (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED)
378 
379 /* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */
380 #define  MUTEX_STATE_BITS_FLIP_CONTENTION(v)      ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED))
381 
382 /* Mutex counter:
383  *
384  * We need to check for overflow before incrementing, and we also need to
385  * detect when the counter is 0
386  */
387 #define  MUTEX_COUNTER_SHIFT         2
388 #define  MUTEX_COUNTER_LEN           11
389 #define  MUTEX_COUNTER_MASK          FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN)
390 
391 #define  MUTEX_COUNTER_BITS_WILL_OVERFLOW(v)    (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK)
392 #define  MUTEX_COUNTER_BITS_IS_ZERO(v)          (((v) & MUTEX_COUNTER_MASK) == 0)
393 
394 /* Used to increment the counter directly after overflow has been checked */
395 #define  MUTEX_COUNTER_BITS_ONE      FIELD_TO_BITS(1, MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN)
396 
397 /* Mutex shared bit flag
398  *
399  * This flag is set to indicate that the mutex is shared among processes.
400  * This changes the futex opcode we use for futex wait/wake operations
401  * (non-shared operations are much faster).
402  */
403 #define  MUTEX_SHARED_SHIFT    13
404 #define  MUTEX_SHARED_MASK     FIELD_MASK(MUTEX_SHARED_SHIFT,1)
405 
406 /* Mutex type:
407  * We support normal, recursive and errorcheck mutexes.
408  */
409 #define  MUTEX_TYPE_SHIFT      14
410 #define  MUTEX_TYPE_LEN        2
411 #define  MUTEX_TYPE_MASK       FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)
412 
413 #define  MUTEX_TYPE_TO_BITS(t)       FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)
414 
415 #define  MUTEX_TYPE_BITS_NORMAL      MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_NORMAL)
416 #define  MUTEX_TYPE_BITS_RECURSIVE   MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_RECURSIVE)
417 #define  MUTEX_TYPE_BITS_ERRORCHECK  MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_ERRORCHECK)
418 // Use a special mutex type to mark priority inheritance mutexes.
419 #define  PI_MUTEX_STATE     MUTEX_TYPE_TO_BITS(3)
420 
421 // For a PI mutex, it includes below fields:
422 //   Atomic(uint16_t) state;
423 //   PIMutex pi_mutex;  // uint16_t pi_mutex_id in 32-bit programs
424 //
425 //   state holds the following fields:
426 //
427 //   bits:   name    description
428 //   15-14   type    mutex type, should be 3
429 //   13-0    padding should be 0
430 //
431 //   pi_mutex holds the state of a PI mutex.
432 //   pi_mutex_id holds an integer to find the state of a PI mutex.
433 //
434 // For a Non-PI mutex, it includes below fields:
435 //   Atomic(uint16_t) state;
436 //   atomic_int owner_tid;  // Atomic(uint16_t) in 32-bit programs
437 //
438 //   state holds the following fields:
439 //
440 //   bits:     name     description
441 //   15-14     type     mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck)
442 //   13        shared   process-shared flag
443 //   12-2      counter  <number of times a thread holding a recursive Non-PI mutex> - 1
444 //   1-0       state    lock state (0, 1 or 2)
445 //
446 //   bits 15-13 are constant during the lifetime of the mutex.
447 //
448 //   owner_tid is used only in recursive and errorcheck Non-PI mutexes to hold the mutex owner
449 //   thread id.
450 //
451 // PI mutexes and Non-PI mutexes are distinguished by checking type field in state.
452 #if defined(__LP64__)
453 struct pthread_mutex_internal_t {
454     _Atomic(uint16_t) state;
455     uint16_t __pad;
456     union {
457         atomic_int owner_tid;
458         PIMutex pi_mutex;
459     };
460     char __reserved[28];
461 
ToPIMutexpthread_mutex_internal_t462     PIMutex& ToPIMutex() {
463         return pi_mutex;
464     }
465 
FreePIMutexpthread_mutex_internal_t466     void FreePIMutex() {
467     }
468 } __attribute__((aligned(4)));
469 
470 #else
471 struct pthread_mutex_internal_t {
472     _Atomic(uint16_t) state;
473     union {
474         _Atomic(uint16_t) owner_tid;
475         uint16_t pi_mutex_id;
476     };
477 
ToPIMutexpthread_mutex_internal_t478     PIMutex& ToPIMutex() {
479         return PIMutexAllocator::IdToPIMutex(pi_mutex_id);
480     }
481 
FreePIMutexpthread_mutex_internal_t482     void FreePIMutex() {
483         PIMutexAllocator::FreeId(pi_mutex_id);
484     }
485 } __attribute__((aligned(4)));
486 #endif
487 
488 static_assert(sizeof(pthread_mutex_t) == sizeof(pthread_mutex_internal_t),
489               "pthread_mutex_t should actually be pthread_mutex_internal_t in implementation.");
490 
491 // For binary compatibility with old version of pthread_mutex_t, we can't use more strict alignment
492 // than 4-byte alignment.
493 static_assert(alignof(pthread_mutex_t) == 4,
494               "pthread_mutex_t should fulfill the alignment of pthread_mutex_internal_t.");
495 
__get_internal_mutex(pthread_mutex_t * mutex_interface)496 static inline pthread_mutex_internal_t* __get_internal_mutex(pthread_mutex_t* mutex_interface) {
497   return reinterpret_cast<pthread_mutex_internal_t*>(mutex_interface);
498 }
499 
pthread_mutex_init(pthread_mutex_t * mutex_interface,const pthread_mutexattr_t * attr)500 int pthread_mutex_init(pthread_mutex_t* mutex_interface, const pthread_mutexattr_t* attr) {
501     pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
502 
503     memset(mutex, 0, sizeof(pthread_mutex_internal_t));
504 
505     if (__predict_true(attr == NULL)) {
506         atomic_init(&mutex->state, MUTEX_TYPE_BITS_NORMAL);
507         return 0;
508     }
509 
510     uint16_t state = 0;
511     if ((*attr & MUTEXATTR_SHARED_MASK) != 0) {
512         state |= MUTEX_SHARED_MASK;
513     }
514 
515     switch (*attr & MUTEXATTR_TYPE_MASK) {
516     case PTHREAD_MUTEX_NORMAL:
517       state |= MUTEX_TYPE_BITS_NORMAL;
518       break;
519     case PTHREAD_MUTEX_RECURSIVE:
520       state |= MUTEX_TYPE_BITS_RECURSIVE;
521       break;
522     case PTHREAD_MUTEX_ERRORCHECK:
523       state |= MUTEX_TYPE_BITS_ERRORCHECK;
524       break;
525     default:
526         return EINVAL;
527     }
528 
529     if (((*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT) == PTHREAD_PRIO_INHERIT) {
530 #if !defined(__LP64__)
531         if (state & MUTEX_SHARED_MASK) {
532             return EINVAL;
533         }
534         int id = PIMutexAllocator::AllocId();
535         if (id == -1) {
536             return ENOMEM;
537         }
538         mutex->pi_mutex_id = id;
539 #endif
540         atomic_init(&mutex->state, PI_MUTEX_STATE);
541         PIMutex& pi_mutex = mutex->ToPIMutex();
542         pi_mutex.type = *attr & MUTEXATTR_TYPE_MASK;
543         pi_mutex.shared = (*attr & MUTEXATTR_SHARED_MASK) != 0;
544     } else {
545         atomic_init(&mutex->state, state);
546         atomic_init(&mutex->owner_tid, 0);
547     }
548     return 0;
549 }
550 
551 // namespace for Non-PI mutex routines.
552 namespace NonPI {
553 
NormalMutexTryLock(pthread_mutex_internal_t * mutex,uint16_t shared)554 static inline __always_inline int NormalMutexTryLock(pthread_mutex_internal_t* mutex,
555                                                      uint16_t shared) {
556     const uint16_t unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
557     const uint16_t locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
558 
559     uint16_t old_state = unlocked;
560     if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
561                          locked_uncontended, memory_order_acquire, memory_order_relaxed))) {
562         return 0;
563     }
564     return EBUSY;
565 }
566 
567 /*
568  * Lock a normal Non-PI mutex.
569  *
570  * As noted above, there are three states:
571  *   0 (unlocked, no contention)
572  *   1 (locked, no contention)
573  *   2 (locked, contention)
574  *
575  * Non-recursive mutexes don't use the thread-id or counter fields, and the
576  * "type" value is zero, so the only bits that will be set are the ones in
577  * the lock state field.
578  */
NormalMutexLock(pthread_mutex_internal_t * mutex,uint16_t shared,bool use_realtime_clock,const timespec * abs_timeout_or_null)579 static inline __always_inline int NormalMutexLock(pthread_mutex_internal_t* mutex,
580                                                   uint16_t shared,
581                                                   bool use_realtime_clock,
582                                                   const timespec* abs_timeout_or_null) {
583     if (__predict_true(NormalMutexTryLock(mutex, shared) == 0)) {
584         return 0;
585     }
586     int result = check_timespec(abs_timeout_or_null, true);
587     if (result != 0) {
588         return result;
589     }
590 
591     ScopedTrace trace("Contending for pthread mutex");
592 
593     const uint16_t unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
594     const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
595 
596     // We want to go to sleep until the mutex is available, which requires
597     // promoting it to locked_contended. We need to swap in the new state
598     // and then wait until somebody wakes us up.
599     // An atomic_exchange is used to compete with other threads for the lock.
600     // If it returns unlocked, we have acquired the lock, otherwise another
601     // thread still holds the lock and we should wait again.
602     // If lock is acquired, an acquire fence is needed to make all memory accesses
603     // made by other threads visible to the current CPU.
604     while (atomic_exchange_explicit(&mutex->state, locked_contended,
605                                     memory_order_acquire) != unlocked) {
606         if (__futex_wait_ex(&mutex->state, shared, locked_contended, use_realtime_clock,
607                             abs_timeout_or_null) == -ETIMEDOUT) {
608             return ETIMEDOUT;
609         }
610     }
611     return 0;
612 }
613 
614 /*
615  * Release a normal Non-PI mutex.  The caller is responsible for determining
616  * that we are in fact the owner of this lock.
617  */
NormalMutexUnlock(pthread_mutex_internal_t * mutex,uint16_t shared)618 static inline __always_inline void NormalMutexUnlock(pthread_mutex_internal_t* mutex,
619                                                      uint16_t shared) {
620     const uint16_t unlocked         = shared | MUTEX_STATE_BITS_UNLOCKED;
621     const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
622 
623     // We use an atomic_exchange to release the lock. If locked_contended state
624     // is returned, some threads is waiting for the lock and we need to wake up
625     // one of them.
626     // A release fence is required to make previous stores visible to next
627     // lock owner threads.
628     if (atomic_exchange_explicit(&mutex->state, unlocked,
629                                  memory_order_release) == locked_contended) {
630         // Wake up one waiting thread. We don't know which thread will be
631         // woken or when it'll start executing -- futexes make no guarantees
632         // here. There may not even be a thread waiting.
633         //
634         // The newly-woken thread will replace the unlocked state we just set above
635         // with locked_contended state, which means that when it eventually releases
636         // the mutex it will also call FUTEX_WAKE. This results in one extra wake
637         // call whenever a lock is contended, but let us avoid forgetting anyone
638         // without requiring us to track the number of sleepers.
639         //
640         // It's possible for another thread to sneak in and grab the lock between
641         // the exchange above and the wake call below. If the new thread is "slow"
642         // and holds the lock for a while, we'll wake up a sleeper, which will swap
643         // in locked_uncontended state and then go back to sleep since the lock is
644         // still held. If the new thread is "fast", running to completion before
645         // we call wake, the thread we eventually wake will find an unlocked mutex
646         // and will execute. Either way we have correct behavior and nobody is
647         // orphaned on the wait queue.
648         __futex_wake_ex(&mutex->state, shared, 1);
649     }
650 }
651 
652 /* This common inlined function is used to increment the counter of a recursive Non-PI mutex.
653  *
654  * If the counter overflows, it will return EAGAIN.
655  * Otherwise, it atomically increments the counter and returns 0.
656  *
657  */
RecursiveIncrement(pthread_mutex_internal_t * mutex,uint16_t old_state)658 static inline __always_inline int RecursiveIncrement(pthread_mutex_internal_t* mutex,
659                                                      uint16_t old_state) {
660     // Detect recursive lock overflow and return EAGAIN.
661     // This is safe because only the owner thread can modify the
662     // counter bits in the mutex value.
663     if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(old_state)) {
664         return EAGAIN;
665     }
666 
667     // Other threads are able to change the lower bits (e.g. promoting it to "contended"),
668     // but the mutex counter will not overflow. So we use atomic_fetch_add operation here.
669     // The mutex is already locked by current thread, so we don't need an acquire fence.
670     atomic_fetch_add_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
671     return 0;
672 }
673 
674 // Wait on a recursive or errorcheck Non-PI mutex.
RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t * mutex,uint16_t shared,uint16_t old_state,bool use_realtime_clock,const timespec * abs_timeout)675 static inline __always_inline int RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t* mutex,
676                                                                  uint16_t shared,
677                                                                  uint16_t old_state,
678                                                                  bool use_realtime_clock,
679                                                                  const timespec* abs_timeout) {
680 // __futex_wait always waits on a 32-bit value. But state is 16-bit. For a normal mutex, the owner_tid
681 // field in mutex is not used. On 64-bit devices, the __pad field in mutex is not used.
682 // But when a recursive or errorcheck mutex is used on 32-bit devices, we need to add the
683 // owner_tid value in the value argument for __futex_wait, otherwise we may always get EAGAIN error.
684 
685 #if defined(__LP64__)
686   return __futex_wait_ex(&mutex->state, shared, old_state, use_realtime_clock, abs_timeout);
687 
688 #else
689   // This implementation works only when the layout of pthread_mutex_internal_t matches below expectation.
690   // And it is based on the assumption that Android is always in little-endian devices.
691   static_assert(offsetof(pthread_mutex_internal_t, state) == 0, "");
692   static_assert(offsetof(pthread_mutex_internal_t, owner_tid) == 2, "");
693 
694   uint32_t owner_tid = atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed);
695   return __futex_wait_ex(&mutex->state, shared, (owner_tid << 16) | old_state,
696                          use_realtime_clock, abs_timeout);
697 #endif
698 }
699 
700 // Lock a Non-PI mutex.
MutexLockWithTimeout(pthread_mutex_internal_t * mutex,bool use_realtime_clock,const timespec * abs_timeout_or_null)701 static int MutexLockWithTimeout(pthread_mutex_internal_t* mutex, bool use_realtime_clock,
702                                 const timespec* abs_timeout_or_null) {
703     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
704     uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
705     uint16_t shared = (old_state & MUTEX_SHARED_MASK);
706 
707     // Handle common case first.
708     if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
709         return NormalMutexLock(mutex, shared, use_realtime_clock, abs_timeout_or_null);
710     }
711 
712     // Do we already own this recursive or error-check mutex?
713     pid_t tid = __get_thread()->tid;
714     if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) {
715         if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
716             return EDEADLK;
717         }
718         return RecursiveIncrement(mutex, old_state);
719     }
720 
721     const uint16_t unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
722     const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
723     const uint16_t locked_contended   = mtype | shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
724 
725     // First, if the mutex is unlocked, try to quickly acquire it.
726     // In the optimistic case where this works, set the state to locked_uncontended.
727     if (old_state == unlocked) {
728         // If exchanged successfully, an acquire fence is required to make
729         // all memory accesses made by other threads visible to the current CPU.
730         if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
731                              locked_uncontended, memory_order_acquire, memory_order_relaxed))) {
732             atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
733             return 0;
734         }
735     }
736 
737     ScopedTrace trace("Contending for pthread mutex");
738 
739     while (true) {
740         if (old_state == unlocked) {
741             // NOTE: We put the state to locked_contended since we _know_ there
742             // is contention when we are in this loop. This ensures all waiters
743             // will be unlocked.
744 
745             // If exchanged successfully, an acquire fence is required to make
746             // all memory accesses made by other threads visible to the current CPU.
747             if (__predict_true(atomic_compare_exchange_weak_explicit(&mutex->state,
748                                                                      &old_state, locked_contended,
749                                                                      memory_order_acquire,
750                                                                      memory_order_relaxed))) {
751                 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
752                 return 0;
753             }
754             continue;
755         } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(old_state)) {
756             // We should set it to locked_contended beforing going to sleep. This can make
757             // sure waiters will be woken up eventually.
758 
759             int new_state = MUTEX_STATE_BITS_FLIP_CONTENTION(old_state);
760             if (__predict_false(!atomic_compare_exchange_weak_explicit(&mutex->state,
761                                                                        &old_state, new_state,
762                                                                        memory_order_relaxed,
763                                                                        memory_order_relaxed))) {
764                 continue;
765             }
766             old_state = new_state;
767         }
768 
769         int result = check_timespec(abs_timeout_or_null, true);
770         if (result != 0) {
771             return result;
772         }
773         // We are in locked_contended state, sleep until someone wakes us up.
774         if (RecursiveOrErrorcheckMutexWait(mutex, shared, old_state, use_realtime_clock,
775                                            abs_timeout_or_null) == -ETIMEDOUT) {
776             return ETIMEDOUT;
777         }
778         old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
779     }
780 }
781 
782 }  // namespace NonPI
783 
IsMutexDestroyed(uint16_t mutex_state)784 static inline __always_inline bool IsMutexDestroyed(uint16_t mutex_state) {
785     return mutex_state == 0xffff;
786 }
787 
788 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on
789 // ARM64. So make it noinline.
HandleUsingDestroyedMutex(pthread_mutex_t * mutex,const char * function_name)790 static int __attribute__((noinline)) HandleUsingDestroyedMutex(pthread_mutex_t* mutex,
791                                                                const char* function_name) {
792     if (bionic_get_application_target_sdk_version() >= __ANDROID_API_P__) {
793         __fortify_fatal("%s called on a destroyed mutex (%p)", function_name, mutex);
794     }
795     return EBUSY;
796 }
797 
pthread_mutex_lock(pthread_mutex_t * mutex_interface)798 int pthread_mutex_lock(pthread_mutex_t* mutex_interface) {
799 #if !defined(__LP64__)
800     // Some apps depend on being able to pass NULL as a mutex and get EINVAL
801     // back. Don't need to worry about it for LP64 since the ABI is brand new,
802     // but keep compatibility for LP32. http://b/19995172.
803     if (mutex_interface == NULL) {
804         return EINVAL;
805     }
806 #endif
807 
808     pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
809     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
810     uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
811     // Avoid slowing down fast path of normal mutex lock operation.
812     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
813         uint16_t shared = (old_state & MUTEX_SHARED_MASK);
814         if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) {
815             return 0;
816         }
817     }
818     if (old_state == PI_MUTEX_STATE) {
819         PIMutex& m = mutex->ToPIMutex();
820         // Handle common case first.
821         if (__predict_true(PIMutexTryLock(m) == 0)) {
822             return 0;
823         }
824         return PIMutexTimedLock(mutex->ToPIMutex(), false, nullptr);
825     }
826     if (__predict_false(IsMutexDestroyed(old_state))) {
827         return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
828     }
829     return NonPI::MutexLockWithTimeout(mutex, false, nullptr);
830 }
831 
pthread_mutex_unlock(pthread_mutex_t * mutex_interface)832 int pthread_mutex_unlock(pthread_mutex_t* mutex_interface) {
833 #if !defined(__LP64__)
834     // Some apps depend on being able to pass NULL as a mutex and get EINVAL
835     // back. Don't need to worry about it for LP64 since the ABI is brand new,
836     // but keep compatibility for LP32. http://b/19995172.
837     if (mutex_interface == NULL) {
838         return EINVAL;
839     }
840 #endif
841 
842     pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
843     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
844     uint16_t mtype  = (old_state & MUTEX_TYPE_MASK);
845     uint16_t shared = (old_state & MUTEX_SHARED_MASK);
846 
847     // Handle common case first.
848     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
849         NonPI::NormalMutexUnlock(mutex, shared);
850         return 0;
851     }
852     if (old_state == PI_MUTEX_STATE) {
853         return PIMutexUnlock(mutex->ToPIMutex());
854     }
855     if (__predict_false(IsMutexDestroyed(old_state))) {
856         return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
857     }
858 
859     // Do we already own this recursive or error-check mutex?
860     pid_t tid = __get_thread()->tid;
861     if ( tid != atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed) ) {
862         return EPERM;
863     }
864 
865     // If the counter is > 0, we can simply decrement it atomically.
866     // Since other threads can mutate the lower state bits (and only the
867     // lower state bits), use a compare_exchange loop to do it.
868     if (!MUTEX_COUNTER_BITS_IS_ZERO(old_state)) {
869         // We still own the mutex, so a release fence is not needed.
870         atomic_fetch_sub_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
871         return 0;
872     }
873 
874     // The counter is 0, so we'are going to unlock the mutex by resetting its
875     // state to unlocked, we need to perform a atomic_exchange inorder to read
876     // the current state, which will be locked_contended if there may have waiters
877     // to awake.
878     // A release fence is required to make previous stores visible to next
879     // lock owner threads.
880     atomic_store_explicit(&mutex->owner_tid, 0, memory_order_relaxed);
881     const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
882     old_state = atomic_exchange_explicit(&mutex->state, unlocked, memory_order_release);
883     if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(old_state)) {
884         __futex_wake_ex(&mutex->state, shared, 1);
885     }
886 
887     return 0;
888 }
889 
pthread_mutex_trylock(pthread_mutex_t * mutex_interface)890 int pthread_mutex_trylock(pthread_mutex_t* mutex_interface) {
891     pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
892 
893     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
894     uint16_t mtype  = (old_state & MUTEX_TYPE_MASK);
895 
896     // Handle common case first.
897     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
898         uint16_t shared = (old_state & MUTEX_SHARED_MASK);
899         return NonPI::NormalMutexTryLock(mutex, shared);
900     }
901     if (old_state == PI_MUTEX_STATE) {
902         return PIMutexTryLock(mutex->ToPIMutex());
903     }
904     if (__predict_false(IsMutexDestroyed(old_state))) {
905         return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
906     }
907 
908     // Do we already own this recursive or error-check mutex?
909     pid_t tid = __get_thread()->tid;
910     if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) {
911         if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
912             return EBUSY;
913         }
914         return NonPI::RecursiveIncrement(mutex, old_state);
915     }
916 
917     uint16_t shared = (old_state & MUTEX_SHARED_MASK);
918     const uint16_t unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
919     const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
920 
921     // Same as pthread_mutex_lock, except that we don't want to wait, and
922     // the only operation that can succeed is a single compare_exchange to acquire the
923     // lock if it is released / not owned by anyone. No need for a complex loop.
924     // If exchanged successfully, an acquire fence is required to make
925     // all memory accesses made by other threads visible to the current CPU.
926     old_state = unlocked;
927     if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
928                                                                locked_uncontended,
929                                                                memory_order_acquire,
930                                                                memory_order_relaxed))) {
931         atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
932         return 0;
933     }
934     return EBUSY;
935 }
936 
937 #if !defined(__LP64__)
pthread_mutex_lock_timeout_np(pthread_mutex_t * mutex_interface,unsigned ms)938 extern "C" int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex_interface, unsigned ms) {
939     timespec ts;
940     timespec_from_ms(ts, ms);
941     timespec abs_timeout;
942     absolute_timespec_from_timespec(abs_timeout, ts, CLOCK_MONOTONIC);
943     int error = NonPI::MutexLockWithTimeout(__get_internal_mutex(mutex_interface), false,
944                                             &abs_timeout);
945     if (error == ETIMEDOUT) {
946         error = EBUSY;
947     }
948     return error;
949 }
950 #endif
951 
__pthread_mutex_timedlock(pthread_mutex_t * mutex_interface,bool use_realtime_clock,const timespec * abs_timeout,const char * function)952 static int __pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, bool use_realtime_clock,
953                                      const timespec* abs_timeout, const char* function) {
954     pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
955     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
956     uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
957     // Handle common case first.
958     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
959         uint16_t shared = (old_state & MUTEX_SHARED_MASK);
960         if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) {
961             return 0;
962         }
963     }
964     if (old_state == PI_MUTEX_STATE) {
965         return PIMutexTimedLock(mutex->ToPIMutex(), use_realtime_clock, abs_timeout);
966     }
967     if (__predict_false(IsMutexDestroyed(old_state))) {
968         return HandleUsingDestroyedMutex(mutex_interface, function);
969     }
970     return NonPI::MutexLockWithTimeout(mutex, use_realtime_clock, abs_timeout);
971 }
972 
pthread_mutex_timedlock(pthread_mutex_t * mutex_interface,const struct timespec * abs_timeout)973 int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const struct timespec* abs_timeout) {
974     return __pthread_mutex_timedlock(mutex_interface, true, abs_timeout, __FUNCTION__);
975 }
976 
pthread_mutex_timedlock_monotonic_np(pthread_mutex_t * mutex_interface,const struct timespec * abs_timeout)977 int pthread_mutex_timedlock_monotonic_np(pthread_mutex_t* mutex_interface,
978                                          const struct timespec* abs_timeout) {
979     return __pthread_mutex_timedlock(mutex_interface, false, abs_timeout, __FUNCTION__);
980 }
981 
pthread_mutex_destroy(pthread_mutex_t * mutex_interface)982 int pthread_mutex_destroy(pthread_mutex_t* mutex_interface) {
983     pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
984     uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
985     if (__predict_false(IsMutexDestroyed(old_state))) {
986         return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__);
987     }
988     if (old_state == PI_MUTEX_STATE) {
989         int result = PIMutexDestroy(mutex->ToPIMutex());
990         if (result == 0) {
991             mutex->FreePIMutex();
992             atomic_store(&mutex->state, 0xffff);
993         }
994         return result;
995     }
996     // Store 0xffff to make the mutex unusable. Although POSIX standard says it is undefined
997     // behavior to destroy a locked mutex, we prefer not to change mutex->state in that situation.
998     if (MUTEX_STATE_BITS_IS_UNLOCKED(old_state) &&
999         atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, 0xffff,
1000                                                 memory_order_relaxed, memory_order_relaxed)) {
1001       return 0;
1002     }
1003     return EBUSY;
1004 }
1005