• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-basic-offset: 3; indent-tabs-mode: nil; -*- */
2 /*
3   This file is part of drd, a thread error detector.
4 
5   Copyright (C) 2006-2011 Bart Van Assche <bvanassche@acm.org>.
6 
7   This program is free software; you can redistribute it and/or
8   modify it under the terms of the GNU General Public License as
9   published by the Free Software Foundation; either version 2 of the
10   License, or (at your option) any later version.
11 
12   This program is distributed in the hope that it will be useful, but
13   WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   General Public License for more details.
16 
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20   02111-1307, USA.
21 
22   The GNU General Public License is contained in the file COPYING.
23 */
24 
25 
26 #include "drd_barrier.h"
27 #include "drd_clientobj.h"
28 #include "drd_error.h"
29 #include "drd_suppression.h"
30 #include "pub_tool_errormgr.h"    // VG_(maybe_record_error)()
31 #include "pub_tool_libcassert.h"  // tl_assert()
32 #include "pub_tool_libcprint.h"   // VG_(printf)()
33 #include "pub_tool_machine.h"     // VG_(get_IP)()
34 #include "pub_tool_mallocfree.h"  // VG_(malloc)(), VG_(free)()
35 #include "pub_tool_oset.h"
36 #include "pub_tool_threadstate.h" // VG_(get_running_tid)()
37 
38 
39 /* Type definitions. */
40 
41 /** Information associated with one thread participating in a barrier. */
42 struct barrier_thread_info
43 {
44    UWord       tid;           // A DrdThreadId declared as UWord because
45                               // this member variable is the key of an OSet.
46    Segment*    sg;            // Segment of the last pthread_barrier() call
47                               // by thread tid.
48    Segment*    post_wait_sg;  // Segment created after *_barrier_wait() finished
49    ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
50    Bool       thread_finished;// Whether thread 'tid' has finished.
51 };
52 
53 
54 /* Local functions. */
55 
56 static void barrier_cleanup(struct barrier_info* p);
57 static void barrier_delete_thread(struct barrier_info* const p,
58                                   const DrdThreadId tid);
59 static const char* barrier_get_typename(struct barrier_info* const p);
60 static const char* barrier_type_name(const BarrierT bt);
61 static
62 void barrier_report_wait_delete_race(const struct barrier_info* const p,
63                                      const struct barrier_thread_info* const q);
64 
65 
66 /* Local variables. */
67 
68 static Bool  s_trace_barrier = False;
69 static ULong s_barrier_segment_creation_count;
70 
71 
72 /* Function definitions. */
73 
DRD_(barrier_set_trace)74 void DRD_(barrier_set_trace)(const Bool trace_barrier)
75 {
76    s_trace_barrier = trace_barrier;
77 }
78 
79 /**
80  * Initialize the structure *p with the specified thread ID and iteration
81  * information.
82  */
83 static
DRD_(barrier_thread_initialize)84 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
85                                      const DrdThreadId tid)
86 {
87    p->tid             = tid;
88    p->sg              = NULL;
89    p->post_wait_sg    = 0;
90    p->wait_call_ctxt  = 0;
91    p->thread_finished = False;
92 }
93 
94 /**
95  * Deallocate the memory that is owned by members of
96  * struct barrier_thread_info.
97  */
DRD_(barrier_thread_destroy)98 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
99 {
100    tl_assert(p);
101    DRD_(sg_put)(p->sg);
102    DRD_(sg_put)(p->post_wait_sg);
103 }
104 
105 /**
106  * Initialize the structure *p with the specified client-side barrier address,
107  * barrier object size and number of participants in each barrier.
108  */
109 static
DRD_(barrier_initialize)110 void DRD_(barrier_initialize)(struct barrier_info* const p,
111                               const Addr barrier,
112                               const BarrierT barrier_type,
113                               const Word count)
114 {
115    int i;
116 
117    tl_assert(barrier != 0);
118    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
119    tl_assert(p->a1 == barrier);
120 
121    p->cleanup           = (void(*)(DrdClientobj*))barrier_cleanup;
122    p->delete_thread
123       = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
124    p->barrier_type      = barrier_type;
125    p->count             = count;
126    p->pre_iteration     = 0;
127    p->post_iteration    = 0;
128    p->pre_waiters_left  = count;
129    p->post_waiters_left = count;
130 
131    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
132    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
133              >= sizeof(DrdThreadId));
134    for (i = 0; i < 2; i++) {
135       p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
136                                        VG_(free));
137    }
138 }
139 
140 /**
141  * Deallocate the memory owned by the struct barrier_info object and also
142  * all the nodes in the OSet p->oset.
143  *
144  * Called by clientobj_destroy().
145  */
barrier_cleanup(struct barrier_info * p)146 static void barrier_cleanup(struct barrier_info* p)
147 {
148    struct barrier_thread_info* q;
149    Segment* latest_sg = 0;
150    OSet* oset;
151    int i;
152 
153    tl_assert(p);
154 
155    DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
156    tl_assert(latest_sg);
157 
158    if (p->pre_waiters_left != p->count) {
159       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
160       VG_(maybe_record_error)(VG_(get_running_tid)(),
161                               BarrierErr,
162                               VG_(get_IP)(VG_(get_running_tid)()),
163                               "Destruction of barrier that is being waited"
164                               " upon",
165                               &bei);
166    } else {
167       oset = p->oset[1 - (p->pre_iteration & 1)];
168       VG_(OSetGen_ResetIter)(oset);
169       for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
170          if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
171                                               &latest_sg->vc))
172          {
173             barrier_report_wait_delete_race(p, q);
174          }
175          DRD_(barrier_thread_destroy)(q);
176       }
177    }
178 
179    for (i = 0; i < 2; i++) {
180       VG_(OSetGen_Destroy)(p->oset[i]);
181       p->oset[i] = NULL;
182    }
183 
184    DRD_(sg_put)(latest_sg);
185 }
186 
187 /**
188  * Look up the client-side barrier address barrier in s_barrier[]. If not
189  * found, add it.
190  */
191 static
192 struct barrier_info*
DRD_(barrier_get_or_allocate)193 DRD_(barrier_get_or_allocate)(const Addr barrier,
194                               const BarrierT barrier_type, const Word count)
195 {
196    struct barrier_info *p;
197 
198    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
199 
200    tl_assert(offsetof(DrdClientobj, barrier) == 0);
201    p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
202    if (p == 0)
203    {
204       p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
205       DRD_(barrier_initialize)(p, barrier, barrier_type, count);
206    }
207    return p;
208 }
209 
210 /**
211  * Look up the address of the struct barrier_info associated with the
212  * client-side barrier object.
213  */
DRD_(barrier_get)214 static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
215 {
216    tl_assert(offsetof(DrdClientobj, barrier) == 0);
217    return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
218 }
219 
220 /**
221  * Initialize a barrier with given client address, barrier type and number of
222  * participants. The 'reinitialization' argument indicates whether a barrier
223  * object is being initialized or reinitialized.
224  *
225  * Called before pthread_barrier_init().
226  */
DRD_(barrier_init)227 void DRD_(barrier_init)(const Addr barrier,
228                         const BarrierT barrier_type, const Word count,
229                         const Bool reinitialization)
230 {
231    struct barrier_info* p;
232 
233    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
234 
235    if (count == 0)
236    {
237       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
238       VG_(maybe_record_error)(VG_(get_running_tid)(),
239                               BarrierErr,
240                               VG_(get_IP)(VG_(get_running_tid)()),
241                               "pthread_barrier_init: 'count' argument is zero",
242                               &bei);
243    }
244 
245    if (! reinitialization && barrier_type == pthread_barrier)
246    {
247       p = DRD_(barrier_get)(barrier);
248       if (p)
249       {
250          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
251          VG_(maybe_record_error)(VG_(get_running_tid)(),
252                                  BarrierErr,
253                                  VG_(get_IP)(VG_(get_running_tid)()),
254                                  "Barrier reinitialization",
255                                  &bei);
256       }
257    }
258 
259    p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
260 
261    if (s_trace_barrier) {
262       if (reinitialization)
263          DRD_(trace_msg)("[%d] barrier_reinit    %s 0x%lx count %ld -> %ld",
264                          DRD_(thread_get_running_tid)(),
265                          barrier_get_typename(p), barrier, p->count, count);
266       else
267          DRD_(trace_msg)("[%d] barrier_init      %s 0x%lx",
268                          DRD_(thread_get_running_tid)(),
269                          barrier_get_typename(p),
270                          barrier);
271    }
272 
273    if (reinitialization && p->count != count)
274    {
275       if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
276       {
277          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
278          VG_(maybe_record_error)(VG_(get_running_tid)(),
279                                  BarrierErr,
280                                  VG_(get_IP)(VG_(get_running_tid)()),
281                                  "Reinitialization of barrier with active"
282                                  " waiters",
283                                  &bei);
284       }
285       p->count = count;
286    }
287 }
288 
289 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
DRD_(barrier_destroy)290 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
291 {
292    struct barrier_info* p;
293 
294    p = DRD_(barrier_get)(barrier);
295 
296    if (s_trace_barrier)
297       DRD_(trace_msg)("[%d] barrier_destroy   %s 0x%lx",
298                       DRD_(thread_get_running_tid)(),
299                       barrier_get_typename(p), barrier);
300 
301    if (p == 0)
302    {
303       GenericErrInfo GEI = {
304 	 .tid = DRD_(thread_get_running_tid)(),
305 	 .addr = barrier,
306       };
307       VG_(maybe_record_error)(VG_(get_running_tid)(),
308                               GenericErr,
309                               VG_(get_IP)(VG_(get_running_tid)()),
310                               "Not a barrier",
311                               &GEI);
312       return;
313    }
314 
315    if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
316    {
317       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
318       VG_(maybe_record_error)(VG_(get_running_tid)(),
319                               BarrierErr,
320                               VG_(get_IP)(VG_(get_running_tid)()),
321                               "Destruction of a barrier with active waiters",
322                               &bei);
323    }
324 
325    DRD_(clientobj_remove)(p->a1, ClientBarrier);
326 }
327 
328 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
DRD_(barrier_pre_wait)329 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
330                             const BarrierT barrier_type)
331 {
332    struct barrier_info* p;
333    struct barrier_thread_info* q;
334    const UWord word_tid = tid;
335    OSet* oset;
336 
337    p = DRD_(barrier_get)(barrier);
338    if (p == 0 && barrier_type == gomp_barrier) {
339       /*
340        * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
341        * not. The only cause I know of that can trigger this is that libgomp.so
342        * has been compiled with --enable-linux-futex.
343        */
344       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
345       VG_(maybe_record_error)(VG_(get_running_tid)(),
346                               BarrierErr,
347                               VG_(get_IP)(VG_(get_running_tid)()),
348                               "Please verify whether gcc has been configured"
349                               " with option --disable-linux-futex. See also"
350                               " the section about OpenMP in the DRD manual.",
351                               &bei);
352    }
353    tl_assert(p);
354 
355    if (s_trace_barrier)
356       DRD_(trace_msg)("[%d] barrier_pre_wait  %s 0x%lx iteration %ld",
357                       DRD_(thread_get_running_tid)(),
358                       barrier_get_typename(p), barrier, p->pre_iteration);
359 
360    /* Clean up nodes associated with finished threads. */
361    oset = p->oset[p->pre_iteration & 1];
362    tl_assert(oset);
363    VG_(OSetGen_ResetIter)(oset);
364    for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
365       if (q->thread_finished) {
366          void* r = VG_(OSetGen_Remove)(oset, &q->tid);
367          tl_assert(r == q);
368          DRD_(barrier_thread_destroy)(q);
369          VG_(OSetGen_FreeNode)(oset, q);
370          VG_(OSetGen_ResetIterAt)(oset, &word_tid);
371       }
372    }
373    /* Allocate the per-thread data structure if necessary. */
374    q = VG_(OSetGen_Lookup)(oset, &word_tid);
375    if (q == NULL) {
376       q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
377       DRD_(barrier_thread_initialize)(q, tid);
378       VG_(OSetGen_Insert)(oset, q);
379       tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
380    }
381 
382    /* Record *_barrier_wait() call context. */
383    q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
384 
385    /*
386     * Store a pointer to the latest segment of the current thread in the
387     * per-thread data structure.
388     */
389    DRD_(thread_get_latest_segment)(&q->sg, tid);
390 
391    /*
392     * If the same number of threads as the barrier count indicates have
393     * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
394     * reset the p->pre_waiters_left counter.
395     */
396    if (--p->pre_waiters_left <= 0)
397    {
398       p->pre_iteration++;
399       p->pre_waiters_left = p->count;
400    }
401 }
402 
403 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
DRD_(barrier_post_wait)404 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
405                              const BarrierT barrier_type, const Bool waited,
406                              const Bool serializing)
407 {
408    struct barrier_info* p;
409    const UWord word_tid = tid;
410    struct barrier_thread_info* q;
411    struct barrier_thread_info* r;
412    OSet* oset;
413 
414    p = DRD_(barrier_get)(barrier);
415 
416    if (s_trace_barrier)
417       DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s",
418                       tid, p ? barrier_get_typename(p) : "(?)",
419                       barrier, p ? p->post_iteration : -1,
420                       serializing ? " (serializing)" : "");
421 
422    /*
423     * If p == 0, this means that the barrier has been destroyed after
424     * *_barrier_wait() returned and before this function was called. Just
425     * return in that case -- race conditions between *_barrier_wait()
426     * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
427     */
428    if (p == 0)
429       return;
430 
431    /* If the *_barrier_wait() call returned an error code, exit. */
432    if (! waited)
433       return;
434 
435    oset = p->oset[p->post_iteration & 1];
436    q = VG_(OSetGen_Lookup)(oset, &word_tid);
437    if (p->pre_iteration - p->post_iteration > 1) {
438       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
439       VG_(maybe_record_error)(VG_(get_running_tid)(),
440                               BarrierErr,
441                               VG_(get_IP)(VG_(get_running_tid)()),
442                               "Number of concurrent pthread_barrier_wait()"
443                               " calls exceeds the barrier count",
444                               &bei);
445    } else if (q == NULL) {
446       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
447       VG_(maybe_record_error)(VG_(get_running_tid)(),
448                               BarrierErr,
449                               VG_(get_IP)(VG_(get_running_tid)()),
450                               "Error in barrier implementation"
451                               " -- barrier_wait() started before"
452                               " barrier_destroy() and finished after"
453                               " barrier_destroy()",
454                               &bei);
455    }
456    if (q == NULL) {
457       q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
458       DRD_(barrier_thread_initialize)(q, tid);
459       VG_(OSetGen_Insert)(oset, q);
460       tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
461       DRD_(thread_get_latest_segment)(&q->sg, tid);
462    }
463 
464    /* Create a new segment and store a pointer to that segment. */
465    DRD_(thread_new_segment)(tid);
466    DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
467    s_barrier_segment_creation_count++;
468 
469    /*
470     * Combine all vector clocks that were stored in the pre_barrier_wait
471     * wrapper with the vector clock of the current thread.
472     */
473    {
474       VectorClock old_vc;
475 
476       DRD_(vc_copy)(&old_vc, &DRD_(g_threadinfo)[tid].last->vc);
477       VG_(OSetGen_ResetIter)(oset);
478       for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
479       {
480          if (r != q)
481          {
482             tl_assert(r->sg);
483             DRD_(vc_combine)(&DRD_(g_threadinfo)[tid].last->vc,
484                              &r->sg->vc);
485          }
486       }
487       DRD_(thread_update_conflict_set)(tid, &old_vc);
488       DRD_(vc_cleanup)(&old_vc);
489    }
490 
491    /*
492     * If the same number of threads as the barrier count indicates have
493     * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
494     * reset the p->post_waiters_left counter.
495     */
496    if (--p->post_waiters_left <= 0)
497    {
498       p->post_iteration++;
499       p->post_waiters_left = p->count;
500    }
501 }
502 
503 /** Called when thread tid stops to exist. */
barrier_delete_thread(struct barrier_info * const p,const DrdThreadId tid)504 static void barrier_delete_thread(struct barrier_info* const p,
505                                   const DrdThreadId tid)
506 {
507    struct barrier_thread_info* q;
508    const UWord word_tid = tid;
509    int i;
510 
511    for (i = 0; i < 2; i++) {
512       q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
513       if (q)
514          q->thread_finished = True;
515    }
516 }
517 
518 /**
519  * Report that *_barrier_destroy() has been called but that this call was
520  * not synchronized with the last *_barrier_wait() call on the same barrier.
521  *
522  * This topic has been discussed extensively on comp.programming.threads
523  * (February 3, 2009). See also
524  * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
525  */
526 static
barrier_report_wait_delete_race(const struct barrier_info * const p,const struct barrier_thread_info * const q)527 void barrier_report_wait_delete_race(const struct barrier_info* const p,
528                                      const struct barrier_thread_info* const q)
529 {
530    tl_assert(p);
531    tl_assert(q);
532 
533    {
534       BarrierErrInfo bei
535          = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
536       VG_(maybe_record_error)(VG_(get_running_tid)(),
537                               BarrierErr,
538                               VG_(get_IP)(VG_(get_running_tid)()),
539                               "Destruction of barrier not synchronized with"
540                               " barrier wait call",
541                               &bei);
542    }
543 }
544 
barrier_get_typename(struct barrier_info * const p)545 static const char* barrier_get_typename(struct barrier_info* const p)
546 {
547    tl_assert(p);
548 
549    return barrier_type_name(p->barrier_type);
550 }
551 
barrier_type_name(const BarrierT bt)552 static const char* barrier_type_name(const BarrierT bt)
553 {
554    switch (bt)
555    {
556    case pthread_barrier:
557       return "pthread barrier";
558    case gomp_barrier:
559       return "gomp barrier";
560    }
561    return "?";
562 }
563 
DRD_(get_barrier_segment_creation_count)564 ULong DRD_(get_barrier_segment_creation_count)(void)
565 {
566    return s_barrier_segment_creation_count;
567 }
568