• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /** Compute the matrix inverse via Gauss-Jordan elimination.
2  *  This program uses only barriers to separate computation steps but no
3  *  mutexes. It is an example of a race-free program on which no data races
4  *  are reported by the happens-before algorithm (drd), but a lot of data races
5  *  (all false positives) are reported by the Eraser-algorithm (helgrind).
6  */
7 
8 
9 #define _GNU_SOURCE
10 
11 /***********************/
12 /* Include directives. */
13 /***********************/
14 
15 #include <assert.h>
16 #include <math.h>
17 #include <limits.h>  // PTHREAD_STACK_MIN
18 #include <pthread.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>  // getopt()
22 
23 
24 /*********************/
25 /* Type definitions. */
26 /*********************/
27 
28 typedef double elem_t;
29 
30 struct gj_threadinfo
31 {
32   pthread_barrier_t* b;
33   pthread_t tid;
34   elem_t* a;
35   int rows;
36   int cols;
37   int r0;
38   int r1;
39 };
40 
41 
42 /********************/
43 /* Local variables. */
44 /********************/
45 
46 static int s_nthread = 1;
47 
48 
49 /*************************/
50 /* Function definitions. */
51 /*************************/
52 
53 /** Allocate memory for a matrix with the specified number of rows and
54  *  columns.
55  */
new_matrix(const int rows,const int cols)56 static elem_t* new_matrix(const int rows, const int cols)
57 {
58   assert(rows > 0);
59   assert(cols > 0);
60   return malloc(rows * cols * sizeof(elem_t));
61 }
62 
63 /** Free the memory that was allocated for a matrix. */
delete_matrix(elem_t * const a)64 static void delete_matrix(elem_t* const a)
65 {
66   free(a);
67 }
68 
69 /** Fill in some numbers in a matrix. */
init_matrix(elem_t * const a,const int rows,const int cols)70 static void init_matrix(elem_t* const a, const int rows, const int cols)
71 {
72   int i, j;
73   for (i = 0; i < rows; i++)
74   {
75     for (j = 0; j < rows; j++)
76     {
77       a[i * cols + j] = 1.0 / (1 + abs(i-j));
78     }
79   }
80 }
81 
82 /** Print all elements of a matrix. */
print_matrix(const char * const label,const elem_t * const a,const int rows,const int cols)83 void print_matrix(const char* const label,
84                   const elem_t* const a, const int rows, const int cols)
85 {
86   int i, j;
87   printf("%s:\n", label);
88   for (i = 0; i < rows; i++)
89   {
90     for (j = 0; j < cols; j++)
91     {
92       printf("%g ", a[i * cols + j]);
93     }
94     printf("\n");
95   }
96 }
97 
98 /** Copy a subset of the elements of a matrix into another matrix. */
copy_matrix(const elem_t * const from,const int from_rows,const int from_cols,const int from_row_first,const int from_row_last,const int from_col_first,const int from_col_last,elem_t * const to,const int to_rows,const int to_cols,const int to_row_first,const int to_row_last,const int to_col_first,const int to_col_last)99 static void copy_matrix(const elem_t* const from,
100                         const int from_rows,
101                         const int from_cols,
102                         const int from_row_first,
103                         const int from_row_last,
104                         const int from_col_first,
105                         const int from_col_last,
106                         elem_t* const to,
107                         const int to_rows,
108                         const int to_cols,
109                         const int to_row_first,
110                         const int to_row_last,
111                         const int to_col_first,
112                         const int to_col_last)
113 {
114   int i, j;
115 
116   assert(from_row_last - from_row_first == to_row_last - to_row_first);
117   assert(from_col_last - from_col_first == to_col_last - to_col_first);
118 
119   for (i = from_row_first; i < from_row_last; i++)
120   {
121     assert(i < from_rows);
122     assert(i - from_row_first + to_row_first < to_rows);
123     for (j = from_col_first; j < from_col_last; j++)
124     {
125       assert(j < from_cols);
126       assert(j - from_col_first + to_col_first < to_cols);
127       to[(i - from_row_first + to_col_first) * to_cols
128          + (j - from_col_first + to_col_first)]
129         = from[i * from_cols + j];
130     }
131   }
132 }
133 
134 /** Compute the matrix product of a1 and a2. */
multiply_matrices(const elem_t * const a1,const int rows1,const int cols1,const elem_t * const a2,const int rows2,const int cols2)135 static elem_t* multiply_matrices(const elem_t* const a1,
136                                  const int rows1,
137                                  const int cols1,
138                                  const elem_t* const a2,
139                                  const int rows2,
140                                  const int cols2)
141 {
142   int i, j, k;
143   elem_t* prod;
144 
145   assert(cols1 == rows2);
146 
147   prod = new_matrix(rows1, cols2);
148   for (i = 0; i < rows1; i++)
149   {
150     for (j = 0; j < cols2; j++)
151     {
152       prod[i * cols2 + j] = 0;
153       for (k = 0; k < cols1; k++)
154       {
155         prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
156       }
157     }
158   }
159   return prod;
160 }
161 
162 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
163  *  at row r0 and up to but not including row r1. It is assumed that as many
164  *  threads execute this function concurrently as the count barrier p->b was
165  *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
166  *  has at least as many columns as rows, the result of this algorithm is that
167  *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
168  * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
169  */
gj_threadfunc(struct gj_threadinfo * p)170 static void gj_threadfunc(struct gj_threadinfo* p)
171 {
172   int i, j, k;
173   elem_t* const a = p->a;
174   const int rows = p->rows;
175   const int cols = p->cols;
176   elem_t aii;
177 
178   for (i = 0; i < p->rows; i++)
179   {
180     if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD)
181     {
182       // Pivoting.
183       j = i;
184       for (k = i + 1; k < rows; k++)
185       {
186         if (a[k * cols + i] > a[j * cols + i])
187         {
188           j = k;
189         }
190       }
191       if (j != i)
192       {
193         for (k = 0; k < cols; k++)
194         {
195           const elem_t t = a[i * cols + k];
196           a[i * cols + k] = a[j * cols + k];
197           a[j * cols + k] = t;
198         }
199       }
200       // Normalize row i.
201       aii = a[i * cols + i];
202       if (aii != 0)
203         for (k = i; k < cols; k++)
204           a[i * cols + k] /= aii;
205     }
206     pthread_barrier_wait(p->b);
207     // Reduce all rows j != i.
208     for (j = p->r0; j < p->r1; j++)
209     {
210       if (i != j)
211       {
212         const elem_t factor = a[j * cols + i];
213         for (k = 0; k < cols; k++)
214         {
215           a[j * cols + k] -= a[i * cols + k] * factor;
216         }
217       }
218     }
219   }
220 }
221 
222 /** Multithreaded Gauss-Jordan algorithm. */
gj(elem_t * const a,const int rows,const int cols)223 static void gj(elem_t* const a, const int rows, const int cols)
224 {
225   int i;
226   struct gj_threadinfo* t;
227   pthread_barrier_t b;
228   pthread_attr_t attr;
229   int err;
230 
231   assert(rows <= cols);
232 
233   t = malloc(sizeof(struct gj_threadinfo) * s_nthread);
234 
235   pthread_barrier_init(&b, 0, s_nthread);
236 
237   pthread_attr_init(&attr);
238   err = pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN + 4096);
239   assert(err == 0);
240 
241   for (i = 0; i < s_nthread; i++)
242   {
243     t[i].b = &b;
244     t[i].a = a;
245     t[i].rows = rows;
246     t[i].cols = cols;
247     t[i].r0 = i * rows / s_nthread;
248     t[i].r1 = (i+1) * rows / s_nthread;
249     pthread_create(&t[i].tid, &attr, (void*(*)(void*))gj_threadfunc, &t[i]);
250   }
251 
252   pthread_attr_destroy(&attr);
253 
254   for (i = 0; i < s_nthread; i++)
255   {
256     pthread_join(t[i].tid, 0);
257   }
258 
259   pthread_barrier_destroy(&b);
260 
261   free(t);
262 }
263 
264 /** Matrix inversion via the Gauss-Jordan algorithm. */
invert_matrix(const elem_t * const a,const int n)265 static elem_t* invert_matrix(const elem_t* const a, const int n)
266 {
267   int i, j;
268   elem_t* const inv = new_matrix(n, n);
269   elem_t* const tmp = new_matrix(n, 2*n);
270   copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
271   for (i = 0; i < n; i++)
272     for (j = 0; j < n; j++)
273       tmp[i * 2 * n + n + j] = (i == j);
274   gj(tmp, n, 2*n);
275   copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
276   delete_matrix(tmp);
277   return inv;
278 }
279 
280 /** Compute the average square error between the identity matrix and the
281  * product of matrix a with its inverse matrix.
282  */
identity_error(const elem_t * const a,const int n)283 static double identity_error(const elem_t* const a, const int n)
284 {
285   int i, j;
286   elem_t e = 0;
287   for (i = 0; i < n; i++)
288   {
289     for (j = 0; j < n; j++)
290     {
291       const elem_t d = a[i * n + j] - (i == j);
292       e += d * d;
293     }
294   }
295   return sqrt(e / (n * n));
296 }
297 
298 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
299  *  smallest number for which the sum of one and that number is different of
300  *  one. It is assumed that the underlying representation of elem_t uses
301  *  base two.
302  */
epsilon()303 static elem_t epsilon()
304 {
305   elem_t eps;
306   for (eps = 1; 1 + eps != 1; eps /= 2)
307     ;
308   return 2 * eps;
309 }
310 
main(int argc,char ** argv)311 int main(int argc, char** argv)
312 {
313   int matrix_size;
314   int silent = 0;
315   int optchar;
316   elem_t *a, *inv, *prod;
317   elem_t eps;
318   double error;
319   double ratio;
320 
321   while ((optchar = getopt(argc, argv, "qt:")) != EOF)
322   {
323     switch (optchar)
324     {
325     case 'q': silent = 1; break;
326     case 't': s_nthread = atoi(optarg); break;
327     default:
328       fprintf(stderr, "Error: unknown option '%c'.\n", optchar);
329       return 1;
330     }
331   }
332 
333   if (optind + 1 != argc)
334   {
335     fprintf(stderr, "Error: wrong number of arguments.\n");
336     return 1;
337   }
338   matrix_size = atoi(argv[optind]);
339 
340   /* Error checking. */
341   assert(matrix_size >= 1);
342   assert(s_nthread >= 1);
343 
344   eps = epsilon();
345   a = new_matrix(matrix_size, matrix_size);
346   init_matrix(a, matrix_size, matrix_size);
347   inv = invert_matrix(a, matrix_size);
348   prod = multiply_matrices(a, matrix_size, matrix_size,
349                            inv, matrix_size, matrix_size);
350   error = identity_error(prod, matrix_size);
351   ratio = error / (eps * matrix_size);
352   if (! silent)
353   {
354     printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
355            error, eps, ratio);
356   }
357   if (isfinite(ratio) && ratio < 100)
358     printf("Error within bounds.\n");
359   else
360     printf("Error out of bounds.\n");
361   delete_matrix(prod);
362   delete_matrix(inv);
363   delete_matrix(a);
364 
365   return 0;
366 }
367