1 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
2 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
3
4 #define ATOMIC_INIT(...) {__VA_ARGS__}
5
6 typedef enum {
7 atomic_memory_order_relaxed,
8 atomic_memory_order_acquire,
9 atomic_memory_order_release,
10 atomic_memory_order_acq_rel,
11 atomic_memory_order_seq_cst
12 } atomic_memory_order_t;
13
14 ATOMIC_INLINE void
atomic_fence(atomic_memory_order_t mo)15 atomic_fence(atomic_memory_order_t mo) {
16 /* Easy cases first: no barrier, and full barrier. */
17 if (mo == atomic_memory_order_relaxed) {
18 asm volatile("" ::: "memory");
19 return;
20 }
21 if (mo == atomic_memory_order_seq_cst) {
22 asm volatile("" ::: "memory");
23 __sync_synchronize();
24 asm volatile("" ::: "memory");
25 return;
26 }
27 asm volatile("" ::: "memory");
28 # if defined(__i386__) || defined(__x86_64__)
29 /* This is implicit on x86. */
30 # elif defined(__ppc__)
31 asm volatile("lwsync");
32 # elif defined(__sparc__) && defined(__arch64__)
33 if (mo == atomic_memory_order_acquire) {
34 asm volatile("membar #LoadLoad | #LoadStore");
35 } else if (mo == atomic_memory_order_release) {
36 asm volatile("membar #LoadStore | #StoreStore");
37 } else {
38 asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
39 }
40 # else
41 __sync_synchronize();
42 # endif
43 asm volatile("" ::: "memory");
44 }
45
46 /*
47 * A correct implementation of seq_cst loads and stores on weakly ordered
48 * architectures could do either of the following:
49 * 1. store() is weak-fence -> store -> strong fence, load() is load ->
50 * strong-fence.
51 * 2. store() is strong-fence -> store, load() is strong-fence -> load ->
52 * weak-fence.
53 * The tricky thing is, load() and store() above can be the load or store
54 * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
55 * means going with strategy 2.
56 * On strongly ordered architectures, the natural strategy is to stick a strong
57 * fence after seq_cst stores, and have naked loads. So we want the strong
58 * fences in different places on different architectures.
59 * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
60 * accomplish this.
61 */
62
63 ATOMIC_INLINE void
atomic_pre_sc_load_fence()64 atomic_pre_sc_load_fence() {
65 # if defined(__i386__) || defined(__x86_64__) || \
66 (defined(__sparc__) && defined(__arch64__))
67 atomic_fence(atomic_memory_order_relaxed);
68 # else
69 atomic_fence(atomic_memory_order_seq_cst);
70 # endif
71 }
72
73 ATOMIC_INLINE void
atomic_post_sc_store_fence()74 atomic_post_sc_store_fence() {
75 # if defined(__i386__) || defined(__x86_64__) || \
76 (defined(__sparc__) && defined(__arch64__))
77 atomic_fence(atomic_memory_order_seq_cst);
78 # else
79 atomic_fence(atomic_memory_order_relaxed);
80 # endif
81
82 }
83
84 #define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
85 /* unused */ lg_size) \
86 typedef struct { \
87 type volatile repr; \
88 } atomic_##short_type##_t; \
89 \
90 ATOMIC_INLINE type \
91 atomic_load_##short_type(const atomic_##short_type##_t *a, \
92 atomic_memory_order_t mo) { \
93 if (mo == atomic_memory_order_seq_cst) { \
94 atomic_pre_sc_load_fence(); \
95 } \
96 type result = a->repr; \
97 if (mo != atomic_memory_order_relaxed) { \
98 atomic_fence(atomic_memory_order_acquire); \
99 } \
100 return result; \
101 } \
102 \
103 ATOMIC_INLINE void \
104 atomic_store_##short_type(atomic_##short_type##_t *a, \
105 type val, atomic_memory_order_t mo) { \
106 if (mo != atomic_memory_order_relaxed) { \
107 atomic_fence(atomic_memory_order_release); \
108 } \
109 a->repr = val; \
110 if (mo == atomic_memory_order_seq_cst) { \
111 atomic_post_sc_store_fence(); \
112 } \
113 } \
114 \
115 ATOMIC_INLINE type \
116 atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
117 atomic_memory_order_t mo) { \
118 /* \
119 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
120 * an atomic exchange builtin. We fake it with a CAS loop. \
121 */ \
122 while (true) { \
123 type old = a->repr; \
124 if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \
125 return old; \
126 } \
127 } \
128 } \
129 \
130 ATOMIC_INLINE bool \
131 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
132 type *expected, type desired, atomic_memory_order_t success_mo, \
133 atomic_memory_order_t failure_mo) { \
134 type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
135 desired); \
136 if (prev == *expected) { \
137 return true; \
138 } else { \
139 *expected = prev; \
140 return false; \
141 } \
142 } \
143 ATOMIC_INLINE bool \
144 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
145 type *expected, type desired, atomic_memory_order_t success_mo, \
146 atomic_memory_order_t failure_mo) { \
147 type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
148 desired); \
149 if (prev == *expected) { \
150 return true; \
151 } else { \
152 *expected = prev; \
153 return false; \
154 } \
155 }
156
157 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
158 /* unused */ lg_size) \
159 JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
160 \
161 ATOMIC_INLINE type \
162 atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
163 atomic_memory_order_t mo) { \
164 return __sync_fetch_and_add(&a->repr, val); \
165 } \
166 \
167 ATOMIC_INLINE type \
168 atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
169 atomic_memory_order_t mo) { \
170 return __sync_fetch_and_sub(&a->repr, val); \
171 } \
172 \
173 ATOMIC_INLINE type \
174 atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
175 atomic_memory_order_t mo) { \
176 return __sync_fetch_and_and(&a->repr, val); \
177 } \
178 \
179 ATOMIC_INLINE type \
180 atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
181 atomic_memory_order_t mo) { \
182 return __sync_fetch_and_or(&a->repr, val); \
183 } \
184 \
185 ATOMIC_INLINE type \
186 atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
187 atomic_memory_order_t mo) { \
188 return __sync_fetch_and_xor(&a->repr, val); \
189 }
190
191 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
192