1 /* -*- mode: C; c-basic-offset: 3; indent-tabs-mode: nil; -*- */
2 /*
3 This file is part of drd, a thread error detector.
4
5 Copyright (C) 2006-2011 Bart Van Assche <bvanassche@acm.org>.
6
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307, USA.
21
22 The GNU General Public License is contained in the file COPYING.
23 */
24
25
26 #include "drd_barrier.h"
27 #include "drd_clientobj.h"
28 #include "drd_error.h"
29 #include "drd_suppression.h"
30 #include "pub_tool_errormgr.h" // VG_(maybe_record_error)()
31 #include "pub_tool_libcassert.h" // tl_assert()
32 #include "pub_tool_libcprint.h" // VG_(printf)()
33 #include "pub_tool_machine.h" // VG_(get_IP)()
34 #include "pub_tool_mallocfree.h" // VG_(malloc)(), VG_(free)()
35 #include "pub_tool_oset.h"
36 #include "pub_tool_threadstate.h" // VG_(get_running_tid)()
37
38
39 /* Type definitions. */
40
41 /** Information associated with one thread participating in a barrier. */
42 struct barrier_thread_info
43 {
44 UWord tid; // A DrdThreadId declared as UWord because
45 // this member variable is the key of an OSet.
46 Segment* sg; // Segment of the last pthread_barrier() call
47 // by thread tid.
48 Segment* post_wait_sg; // Segment created after *_barrier_wait() finished
49 ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
50 Bool thread_finished;// Whether thread 'tid' has finished.
51 };
52
53
54 /* Local functions. */
55
56 static void barrier_cleanup(struct barrier_info* p);
57 static void barrier_delete_thread(struct barrier_info* const p,
58 const DrdThreadId tid);
59 static const char* barrier_get_typename(struct barrier_info* const p);
60 static const char* barrier_type_name(const BarrierT bt);
61 static
62 void barrier_report_wait_delete_race(const struct barrier_info* const p,
63 const struct barrier_thread_info* const q);
64
65
66 /* Local variables. */
67
68 static Bool s_trace_barrier = False;
69 static ULong s_barrier_segment_creation_count;
70
71
72 /* Function definitions. */
73
DRD_(barrier_set_trace)74 void DRD_(barrier_set_trace)(const Bool trace_barrier)
75 {
76 s_trace_barrier = trace_barrier;
77 }
78
79 /**
80 * Initialize the structure *p with the specified thread ID and iteration
81 * information.
82 */
83 static
DRD_(barrier_thread_initialize)84 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
85 const DrdThreadId tid)
86 {
87 p->tid = tid;
88 p->sg = NULL;
89 p->post_wait_sg = 0;
90 p->wait_call_ctxt = 0;
91 p->thread_finished = False;
92 }
93
94 /**
95 * Deallocate the memory that is owned by members of
96 * struct barrier_thread_info.
97 */
DRD_(barrier_thread_destroy)98 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
99 {
100 tl_assert(p);
101 DRD_(sg_put)(p->sg);
102 DRD_(sg_put)(p->post_wait_sg);
103 }
104
105 /**
106 * Initialize the structure *p with the specified client-side barrier address,
107 * barrier object size and number of participants in each barrier.
108 */
109 static
DRD_(barrier_initialize)110 void DRD_(barrier_initialize)(struct barrier_info* const p,
111 const Addr barrier,
112 const BarrierT barrier_type,
113 const Word count)
114 {
115 int i;
116
117 tl_assert(barrier != 0);
118 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
119 tl_assert(p->a1 == barrier);
120
121 p->cleanup = (void(*)(DrdClientobj*))barrier_cleanup;
122 p->delete_thread
123 = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
124 p->barrier_type = barrier_type;
125 p->count = count;
126 p->pre_iteration = 0;
127 p->post_iteration = 0;
128 p->pre_waiters_left = count;
129 p->post_waiters_left = count;
130
131 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
132 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
133 >= sizeof(DrdThreadId));
134 for (i = 0; i < 2; i++) {
135 p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
136 VG_(free));
137 }
138 }
139
140 /**
141 * Deallocate the memory owned by the struct barrier_info object and also
142 * all the nodes in the OSet p->oset.
143 *
144 * Called by clientobj_destroy().
145 */
barrier_cleanup(struct barrier_info * p)146 static void barrier_cleanup(struct barrier_info* p)
147 {
148 struct barrier_thread_info* q;
149 Segment* latest_sg = 0;
150 OSet* oset;
151 int i;
152
153 tl_assert(p);
154
155 DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
156 tl_assert(latest_sg);
157
158 if (p->pre_waiters_left != p->count) {
159 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
160 VG_(maybe_record_error)(VG_(get_running_tid)(),
161 BarrierErr,
162 VG_(get_IP)(VG_(get_running_tid)()),
163 "Destruction of barrier that is being waited"
164 " upon",
165 &bei);
166 } else {
167 oset = p->oset[1 - (p->pre_iteration & 1)];
168 VG_(OSetGen_ResetIter)(oset);
169 for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
170 if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
171 &latest_sg->vc))
172 {
173 barrier_report_wait_delete_race(p, q);
174 }
175 DRD_(barrier_thread_destroy)(q);
176 }
177 }
178
179 for (i = 0; i < 2; i++) {
180 VG_(OSetGen_Destroy)(p->oset[i]);
181 p->oset[i] = NULL;
182 }
183
184 DRD_(sg_put)(latest_sg);
185 }
186
187 /**
188 * Look up the client-side barrier address barrier in s_barrier[]. If not
189 * found, add it.
190 */
191 static
192 struct barrier_info*
DRD_(barrier_get_or_allocate)193 DRD_(barrier_get_or_allocate)(const Addr barrier,
194 const BarrierT barrier_type, const Word count)
195 {
196 struct barrier_info *p;
197
198 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
199
200 tl_assert(offsetof(DrdClientobj, barrier) == 0);
201 p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
202 if (p == 0)
203 {
204 p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
205 DRD_(barrier_initialize)(p, barrier, barrier_type, count);
206 }
207 return p;
208 }
209
210 /**
211 * Look up the address of the struct barrier_info associated with the
212 * client-side barrier object.
213 */
DRD_(barrier_get)214 static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
215 {
216 tl_assert(offsetof(DrdClientobj, barrier) == 0);
217 return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
218 }
219
220 /**
221 * Initialize a barrier with given client address, barrier type and number of
222 * participants. The 'reinitialization' argument indicates whether a barrier
223 * object is being initialized or reinitialized.
224 *
225 * Called before pthread_barrier_init().
226 */
DRD_(barrier_init)227 void DRD_(barrier_init)(const Addr barrier,
228 const BarrierT barrier_type, const Word count,
229 const Bool reinitialization)
230 {
231 struct barrier_info* p;
232
233 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
234
235 if (count == 0)
236 {
237 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
238 VG_(maybe_record_error)(VG_(get_running_tid)(),
239 BarrierErr,
240 VG_(get_IP)(VG_(get_running_tid)()),
241 "pthread_barrier_init: 'count' argument is zero",
242 &bei);
243 }
244
245 if (! reinitialization && barrier_type == pthread_barrier)
246 {
247 p = DRD_(barrier_get)(barrier);
248 if (p)
249 {
250 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
251 VG_(maybe_record_error)(VG_(get_running_tid)(),
252 BarrierErr,
253 VG_(get_IP)(VG_(get_running_tid)()),
254 "Barrier reinitialization",
255 &bei);
256 }
257 }
258
259 p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
260
261 if (s_trace_barrier) {
262 if (reinitialization)
263 DRD_(trace_msg)("[%d] barrier_reinit %s 0x%lx count %ld -> %ld",
264 DRD_(thread_get_running_tid)(),
265 barrier_get_typename(p), barrier, p->count, count);
266 else
267 DRD_(trace_msg)("[%d] barrier_init %s 0x%lx",
268 DRD_(thread_get_running_tid)(),
269 barrier_get_typename(p),
270 barrier);
271 }
272
273 if (reinitialization && p->count != count)
274 {
275 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
276 {
277 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
278 VG_(maybe_record_error)(VG_(get_running_tid)(),
279 BarrierErr,
280 VG_(get_IP)(VG_(get_running_tid)()),
281 "Reinitialization of barrier with active"
282 " waiters",
283 &bei);
284 }
285 p->count = count;
286 }
287 }
288
289 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
DRD_(barrier_destroy)290 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
291 {
292 struct barrier_info* p;
293
294 p = DRD_(barrier_get)(barrier);
295
296 if (s_trace_barrier)
297 DRD_(trace_msg)("[%d] barrier_destroy %s 0x%lx",
298 DRD_(thread_get_running_tid)(),
299 barrier_get_typename(p), barrier);
300
301 if (p == 0)
302 {
303 GenericErrInfo GEI = {
304 .tid = DRD_(thread_get_running_tid)(),
305 .addr = barrier,
306 };
307 VG_(maybe_record_error)(VG_(get_running_tid)(),
308 GenericErr,
309 VG_(get_IP)(VG_(get_running_tid)()),
310 "Not a barrier",
311 &GEI);
312 return;
313 }
314
315 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
316 {
317 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
318 VG_(maybe_record_error)(VG_(get_running_tid)(),
319 BarrierErr,
320 VG_(get_IP)(VG_(get_running_tid)()),
321 "Destruction of a barrier with active waiters",
322 &bei);
323 }
324
325 DRD_(clientobj_remove)(p->a1, ClientBarrier);
326 }
327
328 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
DRD_(barrier_pre_wait)329 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
330 const BarrierT barrier_type)
331 {
332 struct barrier_info* p;
333 struct barrier_thread_info* q;
334 const UWord word_tid = tid;
335 OSet* oset;
336
337 p = DRD_(barrier_get)(barrier);
338 if (p == 0 && barrier_type == gomp_barrier) {
339 /*
340 * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
341 * not. The only cause I know of that can trigger this is that libgomp.so
342 * has been compiled with --enable-linux-futex.
343 */
344 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
345 VG_(maybe_record_error)(VG_(get_running_tid)(),
346 BarrierErr,
347 VG_(get_IP)(VG_(get_running_tid)()),
348 "Please verify whether gcc has been configured"
349 " with option --disable-linux-futex. See also"
350 " the section about OpenMP in the DRD manual.",
351 &bei);
352 }
353 tl_assert(p);
354
355 if (s_trace_barrier)
356 DRD_(trace_msg)("[%d] barrier_pre_wait %s 0x%lx iteration %ld",
357 DRD_(thread_get_running_tid)(),
358 barrier_get_typename(p), barrier, p->pre_iteration);
359
360 /* Clean up nodes associated with finished threads. */
361 oset = p->oset[p->pre_iteration & 1];
362 tl_assert(oset);
363 VG_(OSetGen_ResetIter)(oset);
364 for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
365 if (q->thread_finished) {
366 void* r = VG_(OSetGen_Remove)(oset, &q->tid);
367 tl_assert(r == q);
368 DRD_(barrier_thread_destroy)(q);
369 VG_(OSetGen_FreeNode)(oset, q);
370 VG_(OSetGen_ResetIterAt)(oset, &word_tid);
371 }
372 }
373 /* Allocate the per-thread data structure if necessary. */
374 q = VG_(OSetGen_Lookup)(oset, &word_tid);
375 if (q == NULL) {
376 q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
377 DRD_(barrier_thread_initialize)(q, tid);
378 VG_(OSetGen_Insert)(oset, q);
379 tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
380 }
381
382 /* Record *_barrier_wait() call context. */
383 q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
384
385 /*
386 * Store a pointer to the latest segment of the current thread in the
387 * per-thread data structure.
388 */
389 DRD_(thread_get_latest_segment)(&q->sg, tid);
390
391 /*
392 * If the same number of threads as the barrier count indicates have
393 * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
394 * reset the p->pre_waiters_left counter.
395 */
396 if (--p->pre_waiters_left <= 0)
397 {
398 p->pre_iteration++;
399 p->pre_waiters_left = p->count;
400 }
401 }
402
403 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
DRD_(barrier_post_wait)404 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
405 const BarrierT barrier_type, const Bool waited,
406 const Bool serializing)
407 {
408 struct barrier_info* p;
409 const UWord word_tid = tid;
410 struct barrier_thread_info* q;
411 struct barrier_thread_info* r;
412 OSet* oset;
413
414 p = DRD_(barrier_get)(barrier);
415
416 if (s_trace_barrier)
417 DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s",
418 tid, p ? barrier_get_typename(p) : "(?)",
419 barrier, p ? p->post_iteration : -1,
420 serializing ? " (serializing)" : "");
421
422 /*
423 * If p == 0, this means that the barrier has been destroyed after
424 * *_barrier_wait() returned and before this function was called. Just
425 * return in that case -- race conditions between *_barrier_wait()
426 * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
427 */
428 if (p == 0)
429 return;
430
431 /* If the *_barrier_wait() call returned an error code, exit. */
432 if (! waited)
433 return;
434
435 oset = p->oset[p->post_iteration & 1];
436 q = VG_(OSetGen_Lookup)(oset, &word_tid);
437 if (p->pre_iteration - p->post_iteration > 1) {
438 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
439 VG_(maybe_record_error)(VG_(get_running_tid)(),
440 BarrierErr,
441 VG_(get_IP)(VG_(get_running_tid)()),
442 "Number of concurrent pthread_barrier_wait()"
443 " calls exceeds the barrier count",
444 &bei);
445 } else if (q == NULL) {
446 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
447 VG_(maybe_record_error)(VG_(get_running_tid)(),
448 BarrierErr,
449 VG_(get_IP)(VG_(get_running_tid)()),
450 "Error in barrier implementation"
451 " -- barrier_wait() started before"
452 " barrier_destroy() and finished after"
453 " barrier_destroy()",
454 &bei);
455 }
456 if (q == NULL) {
457 q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
458 DRD_(barrier_thread_initialize)(q, tid);
459 VG_(OSetGen_Insert)(oset, q);
460 tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
461 DRD_(thread_get_latest_segment)(&q->sg, tid);
462 }
463
464 /* Create a new segment and store a pointer to that segment. */
465 DRD_(thread_new_segment)(tid);
466 DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
467 s_barrier_segment_creation_count++;
468
469 /*
470 * Combine all vector clocks that were stored in the pre_barrier_wait
471 * wrapper with the vector clock of the current thread.
472 */
473 {
474 VectorClock old_vc;
475
476 DRD_(vc_copy)(&old_vc, &DRD_(g_threadinfo)[tid].last->vc);
477 VG_(OSetGen_ResetIter)(oset);
478 for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
479 {
480 if (r != q)
481 {
482 tl_assert(r->sg);
483 DRD_(vc_combine)(&DRD_(g_threadinfo)[tid].last->vc,
484 &r->sg->vc);
485 }
486 }
487 DRD_(thread_update_conflict_set)(tid, &old_vc);
488 DRD_(vc_cleanup)(&old_vc);
489 }
490
491 /*
492 * If the same number of threads as the barrier count indicates have
493 * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
494 * reset the p->post_waiters_left counter.
495 */
496 if (--p->post_waiters_left <= 0)
497 {
498 p->post_iteration++;
499 p->post_waiters_left = p->count;
500 }
501 }
502
503 /** Called when thread tid stops to exist. */
barrier_delete_thread(struct barrier_info * const p,const DrdThreadId tid)504 static void barrier_delete_thread(struct barrier_info* const p,
505 const DrdThreadId tid)
506 {
507 struct barrier_thread_info* q;
508 const UWord word_tid = tid;
509 int i;
510
511 for (i = 0; i < 2; i++) {
512 q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
513 if (q)
514 q->thread_finished = True;
515 }
516 }
517
518 /**
519 * Report that *_barrier_destroy() has been called but that this call was
520 * not synchronized with the last *_barrier_wait() call on the same barrier.
521 *
522 * This topic has been discussed extensively on comp.programming.threads
523 * (February 3, 2009). See also
524 * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
525 */
526 static
barrier_report_wait_delete_race(const struct barrier_info * const p,const struct barrier_thread_info * const q)527 void barrier_report_wait_delete_race(const struct barrier_info* const p,
528 const struct barrier_thread_info* const q)
529 {
530 tl_assert(p);
531 tl_assert(q);
532
533 {
534 BarrierErrInfo bei
535 = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
536 VG_(maybe_record_error)(VG_(get_running_tid)(),
537 BarrierErr,
538 VG_(get_IP)(VG_(get_running_tid)()),
539 "Destruction of barrier not synchronized with"
540 " barrier wait call",
541 &bei);
542 }
543 }
544
barrier_get_typename(struct barrier_info * const p)545 static const char* barrier_get_typename(struct barrier_info* const p)
546 {
547 tl_assert(p);
548
549 return barrier_type_name(p->barrier_type);
550 }
551
barrier_type_name(const BarrierT bt)552 static const char* barrier_type_name(const BarrierT bt)
553 {
554 switch (bt)
555 {
556 case pthread_barrier:
557 return "pthread barrier";
558 case gomp_barrier:
559 return "gomp barrier";
560 }
561 return "?";
562 }
563
DRD_(get_barrier_segment_creation_count)564 ULong DRD_(get_barrier_segment_creation_count)(void)
565 {
566 return s_barrier_segment_creation_count;
567 }
568