1 /*
2 This file is part of drd, a thread error detector.
3
4 Copyright (C) 2006-2012 Bart Van Assche <bvanassche@acm.org>.
5
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307, USA.
20
21 The GNU General Public License is contained in the file COPYING.
22 */
23
24
25 #include "drd_barrier.h"
26 #include "drd_clientobj.h"
27 #include "drd_error.h"
28 #include "drd_suppression.h"
29 #include "pub_tool_errormgr.h" // VG_(maybe_record_error)()
30 #include "pub_tool_libcassert.h" // tl_assert()
31 #include "pub_tool_libcprint.h" // VG_(printf)()
32 #include "pub_tool_machine.h" // VG_(get_IP)()
33 #include "pub_tool_mallocfree.h" // VG_(malloc)(), VG_(free)()
34 #include "pub_tool_oset.h"
35 #include "pub_tool_threadstate.h" // VG_(get_running_tid)()
36
37
38 /* Type definitions. */
39
40 /** Information associated with one thread participating in a barrier. */
41 struct barrier_thread_info
42 {
43 UWord tid; // A DrdThreadId declared as UWord because
44 // this member variable is the key of an OSet.
45 Segment* sg; // Segment of the last pthread_barrier() call
46 // by thread tid.
47 Segment* post_wait_sg; // Segment created after *_barrier_wait() finished
48 ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
49 Bool thread_finished;// Whether thread 'tid' has finished.
50 };
51
52
53 /* Local functions. */
54
55 static void barrier_cleanup(struct barrier_info* p);
56 static void barrier_delete_thread(struct barrier_info* const p,
57 const DrdThreadId tid);
58 static const char* barrier_get_typename(struct barrier_info* const p);
59 static const char* barrier_type_name(const BarrierT bt);
60 static
61 void barrier_report_wait_delete_race(const struct barrier_info* const p,
62 const struct barrier_thread_info* const q);
63
64
65 /* Local variables. */
66
67 static Bool s_trace_barrier = False;
68 static ULong s_barrier_segment_creation_count;
69
70
71 /* Function definitions. */
72
DRD_(barrier_set_trace)73 void DRD_(barrier_set_trace)(const Bool trace_barrier)
74 {
75 s_trace_barrier = trace_barrier;
76 }
77
78 /**
79 * Initialize the structure *p with the specified thread ID and iteration
80 * information.
81 */
82 static
DRD_(barrier_thread_initialize)83 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
84 const DrdThreadId tid)
85 {
86 p->tid = tid;
87 p->sg = NULL;
88 p->post_wait_sg = 0;
89 p->wait_call_ctxt = 0;
90 p->thread_finished = False;
91 }
92
93 /**
94 * Deallocate the memory that is owned by members of
95 * struct barrier_thread_info.
96 */
DRD_(barrier_thread_destroy)97 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
98 {
99 tl_assert(p);
100 DRD_(sg_put)(p->sg);
101 DRD_(sg_put)(p->post_wait_sg);
102 }
103
104 /**
105 * Initialize the structure *p with the specified client-side barrier address,
106 * barrier object size and number of participants in each barrier.
107 */
108 static
DRD_(barrier_initialize)109 void DRD_(barrier_initialize)(struct barrier_info* const p,
110 const Addr barrier,
111 const BarrierT barrier_type,
112 const Word count)
113 {
114 int i;
115
116 tl_assert(barrier != 0);
117 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
118 tl_assert(p->a1 == barrier);
119
120 p->cleanup = (void(*)(DrdClientobj*))barrier_cleanup;
121 p->delete_thread
122 = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
123 p->barrier_type = barrier_type;
124 p->count = count;
125 p->pre_iteration = 0;
126 p->post_iteration = 0;
127 p->pre_waiters_left = count;
128 p->post_waiters_left = count;
129
130 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
131 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
132 >= sizeof(DrdThreadId));
133 for (i = 0; i < 2; i++) {
134 p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
135 VG_(free));
136 }
137 }
138
139 /**
140 * Deallocate the memory owned by the struct barrier_info object and also
141 * all the nodes in the OSet p->oset.
142 *
143 * Called by clientobj_destroy().
144 */
barrier_cleanup(struct barrier_info * p)145 static void barrier_cleanup(struct barrier_info* p)
146 {
147 struct barrier_thread_info* q;
148 Segment* latest_sg = 0;
149 OSet* oset;
150 int i;
151
152 tl_assert(p);
153
154 DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
155 tl_assert(latest_sg);
156
157 if (p->pre_waiters_left != p->count) {
158 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
159 VG_(maybe_record_error)(VG_(get_running_tid)(),
160 BarrierErr,
161 VG_(get_IP)(VG_(get_running_tid)()),
162 "Destruction of barrier that is being waited"
163 " upon",
164 &bei);
165 } else {
166 oset = p->oset[1 - (p->pre_iteration & 1)];
167 VG_(OSetGen_ResetIter)(oset);
168 for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
169 if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
170 &latest_sg->vc))
171 {
172 barrier_report_wait_delete_race(p, q);
173 }
174 DRD_(barrier_thread_destroy)(q);
175 }
176 }
177
178 for (i = 0; i < 2; i++) {
179 VG_(OSetGen_Destroy)(p->oset[i]);
180 p->oset[i] = NULL;
181 }
182
183 DRD_(sg_put)(latest_sg);
184 }
185
186 /**
187 * Look up the client-side barrier address barrier in s_barrier[]. If not
188 * found, add it.
189 */
190 static
191 struct barrier_info*
DRD_(barrier_get_or_allocate)192 DRD_(barrier_get_or_allocate)(const Addr barrier,
193 const BarrierT barrier_type, const Word count)
194 {
195 struct barrier_info *p;
196
197 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
198
199 tl_assert(offsetof(DrdClientobj, barrier) == 0);
200 p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
201 if (p == 0)
202 {
203 p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
204 DRD_(barrier_initialize)(p, barrier, barrier_type, count);
205 }
206 return p;
207 }
208
209 /**
210 * Look up the address of the struct barrier_info associated with the
211 * client-side barrier object.
212 */
DRD_(barrier_get)213 static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
214 {
215 tl_assert(offsetof(DrdClientobj, barrier) == 0);
216 return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
217 }
218
219 /**
220 * Initialize a barrier with given client address, barrier type and number of
221 * participants. The 'reinitialization' argument indicates whether a barrier
222 * object is being initialized or reinitialized.
223 *
224 * Called before pthread_barrier_init().
225 */
DRD_(barrier_init)226 void DRD_(barrier_init)(const Addr barrier,
227 const BarrierT barrier_type, const Word count,
228 const Bool reinitialization)
229 {
230 struct barrier_info* p;
231
232 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
233
234 if (count == 0)
235 {
236 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
237 VG_(maybe_record_error)(VG_(get_running_tid)(),
238 BarrierErr,
239 VG_(get_IP)(VG_(get_running_tid)()),
240 "pthread_barrier_init: 'count' argument is zero",
241 &bei);
242 }
243
244 if (! reinitialization && barrier_type == pthread_barrier)
245 {
246 p = DRD_(barrier_get)(barrier);
247 if (p)
248 {
249 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
250 VG_(maybe_record_error)(VG_(get_running_tid)(),
251 BarrierErr,
252 VG_(get_IP)(VG_(get_running_tid)()),
253 "Barrier reinitialization",
254 &bei);
255 }
256 }
257
258 p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
259
260 if (s_trace_barrier) {
261 if (reinitialization)
262 DRD_(trace_msg)("[%d] barrier_reinit %s 0x%lx count %ld -> %ld",
263 DRD_(thread_get_running_tid)(),
264 barrier_get_typename(p), barrier, p->count, count);
265 else
266 DRD_(trace_msg)("[%d] barrier_init %s 0x%lx",
267 DRD_(thread_get_running_tid)(),
268 barrier_get_typename(p),
269 barrier);
270 }
271
272 if (reinitialization && p->count != count)
273 {
274 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
275 {
276 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
277 VG_(maybe_record_error)(VG_(get_running_tid)(),
278 BarrierErr,
279 VG_(get_IP)(VG_(get_running_tid)()),
280 "Reinitialization of barrier with active"
281 " waiters",
282 &bei);
283 }
284 p->count = count;
285 }
286 }
287
288 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
DRD_(barrier_destroy)289 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
290 {
291 struct barrier_info* p;
292
293 p = DRD_(barrier_get)(barrier);
294
295 if (s_trace_barrier)
296 DRD_(trace_msg)("[%d] barrier_destroy %s 0x%lx",
297 DRD_(thread_get_running_tid)(),
298 barrier_get_typename(p), barrier);
299
300 if (p == 0)
301 {
302 GenericErrInfo GEI = {
303 .tid = DRD_(thread_get_running_tid)(),
304 .addr = barrier,
305 };
306 VG_(maybe_record_error)(VG_(get_running_tid)(),
307 GenericErr,
308 VG_(get_IP)(VG_(get_running_tid)()),
309 "Not a barrier",
310 &GEI);
311 return;
312 }
313
314 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
315 {
316 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
317 VG_(maybe_record_error)(VG_(get_running_tid)(),
318 BarrierErr,
319 VG_(get_IP)(VG_(get_running_tid)()),
320 "Destruction of a barrier with active waiters",
321 &bei);
322 }
323
324 DRD_(clientobj_remove)(p->a1, ClientBarrier);
325 }
326
327 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
DRD_(barrier_pre_wait)328 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
329 const BarrierT barrier_type)
330 {
331 struct barrier_info* p;
332 struct barrier_thread_info* q;
333 const UWord word_tid = tid;
334 OSet* oset;
335
336 p = DRD_(barrier_get)(barrier);
337 if (p == 0 && barrier_type == gomp_barrier) {
338 /*
339 * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
340 * not. The only cause I know of that can trigger this is that libgomp.so
341 * has been compiled with --enable-linux-futex.
342 */
343 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
344 VG_(maybe_record_error)(VG_(get_running_tid)(),
345 BarrierErr,
346 VG_(get_IP)(VG_(get_running_tid)()),
347 "Please verify whether gcc has been configured"
348 " with option --disable-linux-futex. See also"
349 " the section about OpenMP in the DRD manual.",
350 &bei);
351 }
352 tl_assert(p);
353
354 if (s_trace_barrier)
355 DRD_(trace_msg)("[%d] barrier_pre_wait %s 0x%lx iteration %ld",
356 DRD_(thread_get_running_tid)(),
357 barrier_get_typename(p), barrier, p->pre_iteration);
358
359 /* Clean up nodes associated with finished threads. */
360 oset = p->oset[p->pre_iteration & 1];
361 tl_assert(oset);
362 VG_(OSetGen_ResetIter)(oset);
363 for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
364 if (q->thread_finished) {
365 void* r = VG_(OSetGen_Remove)(oset, &q->tid);
366 tl_assert(r == q);
367 DRD_(barrier_thread_destroy)(q);
368 VG_(OSetGen_FreeNode)(oset, q);
369 VG_(OSetGen_ResetIterAt)(oset, &word_tid);
370 }
371 }
372 /* Allocate the per-thread data structure if necessary. */
373 q = VG_(OSetGen_Lookup)(oset, &word_tid);
374 if (q == NULL) {
375 q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
376 DRD_(barrier_thread_initialize)(q, tid);
377 VG_(OSetGen_Insert)(oset, q);
378 tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
379 }
380
381 /* Record *_barrier_wait() call context. */
382 q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
383
384 /*
385 * Store a pointer to the latest segment of the current thread in the
386 * per-thread data structure.
387 */
388 DRD_(thread_get_latest_segment)(&q->sg, tid);
389
390 /*
391 * If the same number of threads as the barrier count indicates have
392 * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
393 * reset the p->pre_waiters_left counter.
394 */
395 if (--p->pre_waiters_left <= 0)
396 {
397 p->pre_iteration++;
398 p->pre_waiters_left = p->count;
399 }
400 }
401
402 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
DRD_(barrier_post_wait)403 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
404 const BarrierT barrier_type, const Bool waited,
405 const Bool serializing)
406 {
407 struct barrier_info* p;
408 const UWord word_tid = tid;
409 struct barrier_thread_info* q;
410 struct barrier_thread_info* r;
411 OSet* oset;
412
413 p = DRD_(barrier_get)(barrier);
414
415 if (s_trace_barrier)
416 DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s",
417 tid, p ? barrier_get_typename(p) : "(?)",
418 barrier, p ? p->post_iteration : -1,
419 serializing ? " (serializing)" : "");
420
421 /*
422 * If p == 0, this means that the barrier has been destroyed after
423 * *_barrier_wait() returned and before this function was called. Just
424 * return in that case -- race conditions between *_barrier_wait()
425 * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
426 */
427 if (p == 0)
428 return;
429
430 /* If the *_barrier_wait() call returned an error code, exit. */
431 if (! waited)
432 return;
433
434 oset = p->oset[p->post_iteration & 1];
435 q = VG_(OSetGen_Lookup)(oset, &word_tid);
436 if (p->pre_iteration - p->post_iteration > 1) {
437 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
438 VG_(maybe_record_error)(VG_(get_running_tid)(),
439 BarrierErr,
440 VG_(get_IP)(VG_(get_running_tid)()),
441 "Number of concurrent pthread_barrier_wait()"
442 " calls exceeds the barrier count",
443 &bei);
444 } else if (q == NULL) {
445 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
446 VG_(maybe_record_error)(VG_(get_running_tid)(),
447 BarrierErr,
448 VG_(get_IP)(VG_(get_running_tid)()),
449 "Error in barrier implementation"
450 " -- barrier_wait() started before"
451 " barrier_destroy() and finished after"
452 " barrier_destroy()",
453 &bei);
454 }
455 if (q == NULL) {
456 q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
457 DRD_(barrier_thread_initialize)(q, tid);
458 VG_(OSetGen_Insert)(oset, q);
459 tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
460 DRD_(thread_get_latest_segment)(&q->sg, tid);
461 }
462
463 /* Create a new segment and store a pointer to that segment. */
464 DRD_(thread_new_segment)(tid);
465 DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
466 s_barrier_segment_creation_count++;
467
468 /*
469 * Combine all vector clocks that were stored in the pre_barrier_wait
470 * wrapper with the vector clock of the current thread.
471 */
472 {
473 VectorClock old_vc;
474
475 DRD_(vc_copy)(&old_vc, DRD_(thread_get_vc)(tid));
476 VG_(OSetGen_ResetIter)(oset);
477 for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
478 {
479 if (r != q)
480 {
481 tl_assert(r->sg);
482 DRD_(vc_combine)(DRD_(thread_get_vc)(tid), &r->sg->vc);
483 }
484 }
485 DRD_(thread_update_conflict_set)(tid, &old_vc);
486 DRD_(vc_cleanup)(&old_vc);
487 }
488
489 /*
490 * If the same number of threads as the barrier count indicates have
491 * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
492 * reset the p->post_waiters_left counter.
493 */
494 if (--p->post_waiters_left <= 0)
495 {
496 p->post_iteration++;
497 p->post_waiters_left = p->count;
498 }
499 }
500
501 /** Called when thread tid stops to exist. */
barrier_delete_thread(struct barrier_info * const p,const DrdThreadId tid)502 static void barrier_delete_thread(struct barrier_info* const p,
503 const DrdThreadId tid)
504 {
505 struct barrier_thread_info* q;
506 const UWord word_tid = tid;
507 int i;
508
509 for (i = 0; i < 2; i++) {
510 q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
511 if (q)
512 q->thread_finished = True;
513 }
514 }
515
516 /**
517 * Report that *_barrier_destroy() has been called but that this call was
518 * not synchronized with the last *_barrier_wait() call on the same barrier.
519 *
520 * This topic has been discussed extensively on comp.programming.threads
521 * (February 3, 2009). See also
522 * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
523 */
524 static
barrier_report_wait_delete_race(const struct barrier_info * const p,const struct barrier_thread_info * const q)525 void barrier_report_wait_delete_race(const struct barrier_info* const p,
526 const struct barrier_thread_info* const q)
527 {
528 tl_assert(p);
529 tl_assert(q);
530
531 {
532 BarrierErrInfo bei
533 = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
534 VG_(maybe_record_error)(VG_(get_running_tid)(),
535 BarrierErr,
536 VG_(get_IP)(VG_(get_running_tid)()),
537 "Destruction of barrier not synchronized with"
538 " barrier wait call",
539 &bei);
540 }
541 }
542
barrier_get_typename(struct barrier_info * const p)543 static const char* barrier_get_typename(struct barrier_info* const p)
544 {
545 tl_assert(p);
546
547 return barrier_type_name(p->barrier_type);
548 }
549
barrier_type_name(const BarrierT bt)550 static const char* barrier_type_name(const BarrierT bt)
551 {
552 switch (bt)
553 {
554 case pthread_barrier:
555 return "pthread barrier";
556 case gomp_barrier:
557 return "gomp barrier";
558 }
559 return "?";
560 }
561
DRD_(get_barrier_segment_creation_count)562 ULong DRD_(get_barrier_segment_creation_count)(void)
563 {
564 return s_barrier_segment_creation_count;
565 }
566