• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /** Compute the matrix inverse via Gauss-Jordan elimination.
2  *  This program uses OpenMP separate computation steps but no
3  *  mutexes. It is an example of a race-free program on which no data races
4  *  are reported by the happens-before algorithm (drd), but a lot of data races
5  *  (all false positives) are reported by the Eraser-algorithm (helgrind).
6  */
7 
8 
9 #define _GNU_SOURCE
10 
11 /***********************/
12 /* Include directives. */
13 /***********************/
14 
15 #include <assert.h>
16 #include <math.h>
17 #include <omp.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <unistd.h>  // getopt()
21 
22 
23 /*********************/
24 /* Type definitions. */
25 /*********************/
26 
27 typedef double elem_t;
28 
29 
30 /********************/
31 /* Local variables. */
32 /********************/
33 
34 static int s_trigger_race;
35 
36 
37 /*************************/
38 /* Function definitions. */
39 /*************************/
40 
41 /** Allocate memory for a matrix with the specified number of rows and
42  *  columns.
43  */
new_matrix(const int rows,const int cols)44 static elem_t* new_matrix(const int rows, const int cols)
45 {
46   assert(rows > 0);
47   assert(cols > 0);
48   return malloc(rows * cols * sizeof(elem_t));
49 }
50 
51 /** Free the memory that was allocated for a matrix. */
delete_matrix(elem_t * const a)52 static void delete_matrix(elem_t* const a)
53 {
54   free(a);
55 }
56 
57 /** Fill in some numbers in a matrix. */
init_matrix(elem_t * const a,const int rows,const int cols)58 static void init_matrix(elem_t* const a, const int rows, const int cols)
59 {
60   int i, j;
61   for (i = 0; i < rows; i++)
62   {
63     for (j = 0; j < rows; j++)
64     {
65       a[i * cols + j] = 1.0 / (1 + abs(i-j));
66     }
67   }
68 }
69 
70 /** Print all elements of a matrix. */
print_matrix(const char * const label,const elem_t * const a,const int rows,const int cols)71 void print_matrix(const char* const label,
72                   const elem_t* const a, const int rows, const int cols)
73 {
74   int i, j;
75   printf("%s:\n", label);
76   for (i = 0; i < rows; i++)
77   {
78     for (j = 0; j < cols; j++)
79     {
80       printf("%g ", a[i * cols + j]);
81     }
82     printf("\n");
83   }
84 }
85 
86 /** Copy a subset of the elements of a matrix into another matrix. */
copy_matrix(const elem_t * const from,const int from_rows,const int from_cols,const int from_row_first,const int from_row_last,const int from_col_first,const int from_col_last,elem_t * const to,const int to_rows,const int to_cols,const int to_row_first,const int to_row_last,const int to_col_first,const int to_col_last)87 static void copy_matrix(const elem_t* const from,
88                         const int from_rows,
89                         const int from_cols,
90                         const int from_row_first,
91                         const int from_row_last,
92                         const int from_col_first,
93                         const int from_col_last,
94                         elem_t* const to,
95                         const int to_rows,
96                         const int to_cols,
97                         const int to_row_first,
98                         const int to_row_last,
99                         const int to_col_first,
100                         const int to_col_last)
101 {
102   int i, j;
103 
104   assert(from_row_last - from_row_first == to_row_last - to_row_first);
105   assert(from_col_last - from_col_first == to_col_last - to_col_first);
106 
107   for (i = from_row_first; i < from_row_last; i++)
108   {
109     assert(i < from_rows);
110     assert(i - from_row_first + to_row_first < to_rows);
111     for (j = from_col_first; j < from_col_last; j++)
112     {
113       assert(j < from_cols);
114       assert(j - from_col_first + to_col_first < to_cols);
115       to[(i - from_row_first + to_col_first) * to_cols
116          + (j - from_col_first + to_col_first)]
117         = from[i * from_cols + j];
118     }
119   }
120 }
121 
122 /** Compute the matrix product of a1 and a2. */
multiply_matrices(const elem_t * const a1,const int rows1,const int cols1,const elem_t * const a2,const int rows2,const int cols2)123 static elem_t* multiply_matrices(const elem_t* const a1,
124                                  const int rows1,
125                                  const int cols1,
126                                  const elem_t* const a2,
127                                  const int rows2,
128                                  const int cols2)
129 {
130   int i, j, k;
131   elem_t* prod;
132 
133   assert(cols1 == rows2);
134 
135   prod = new_matrix(rows1, cols2);
136   for (i = 0; i < rows1; i++)
137   {
138     for (j = 0; j < cols2; j++)
139     {
140       prod[i * cols2 + j] = 0;
141       for (k = 0; k < cols1; k++)
142       {
143         prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
144       }
145     }
146   }
147   return prod;
148 }
149 
150 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
151  *  at row r0 and up to but not including row r1. It is assumed that as many
152  *  threads execute this function concurrently as the count barrier p->b was
153  *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
154  *  has at least as many columns as rows, the result of this algorithm is that
155  *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
156  * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
157  */
gj(elem_t * const a,const int rows,const int cols)158 static void gj(elem_t* const a, const int rows, const int cols)
159 {
160   int i, j, k;
161 
162   for (i = 0; i < rows; i++)
163   {
164     {
165       // Pivoting.
166       j = i;
167       for (k = i + 1; k < rows; k++)
168       {
169         if (a[k * cols + i] > a[j * cols + i])
170         {
171           j = k;
172         }
173       }
174       if (j != i)
175       {
176         for (k = 0; k < cols; k++)
177         {
178           const elem_t t = a[i * cols + k];
179           a[i * cols + k] = a[j * cols + k];
180           a[j * cols + k] = t;
181         }
182       }
183       // Normalize row i.
184       if (a[i * cols + i] != 0)
185       {
186         for (k = cols - 1; k >= 0; k--)
187         {
188           a[i * cols + k] /= a[i * cols + i];
189         }
190       }
191     }
192 
193     // Reduce all rows j != i.
194 
195     if (s_trigger_race)
196     {
197 #     pragma omp parallel for private(j)
198       for (j = 0; j < rows; j++)
199       {
200         if (i != j)
201         {
202           const elem_t factor = a[j * cols + i];
203           for (k = 0; k < cols; k++)
204           {
205             a[j * cols + k] -= a[i * cols + k] * factor;
206           }
207         }
208       }
209     }
210     else
211     {
212 #     pragma omp parallel for private(j, k)
213       for (j = 0; j < rows; j++)
214       {
215         if (i != j)
216         {
217           const elem_t factor = a[j * cols + i];
218           for (k = 0; k < cols; k++)
219           {
220             a[j * cols + k] -= a[i * cols + k] * factor;
221           }
222         }
223       }
224     }
225   }
226 }
227 
228 /** Matrix inversion via the Gauss-Jordan algorithm. */
invert_matrix(const elem_t * const a,const int n)229 static elem_t* invert_matrix(const elem_t* const a, const int n)
230 {
231   int i, j;
232   elem_t* const inv = new_matrix(n, n);
233   elem_t* const tmp = new_matrix(n, 2*n);
234   copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
235   for (i = 0; i < n; i++)
236     for (j = 0; j < n; j++)
237       tmp[i * 2 * n + n + j] = (i == j);
238   gj(tmp, n, 2*n);
239   copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
240   delete_matrix(tmp);
241   return inv;
242 }
243 
244 /** Compute the average square error between the identity matrix and the
245  * product of matrix a with its inverse matrix.
246  */
identity_error(const elem_t * const a,const int n)247 static double identity_error(const elem_t* const a, const int n)
248 {
249   int i, j;
250   elem_t e = 0;
251   for (i = 0; i < n; i++)
252   {
253     for (j = 0; j < n; j++)
254     {
255       const elem_t d = a[i * n + j] - (i == j);
256       e += d * d;
257     }
258   }
259   return sqrt(e / (n * n));
260 }
261 
262 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
263  *  smallest number for which the sum of one and that number is different of
264  *  one. It is assumed that the underlying representation of elem_t uses
265  *  base two.
266  */
epsilon()267 static elem_t epsilon()
268 {
269   elem_t eps;
270   for (eps = 1; 1 + eps != 1; eps /= 2)
271     ;
272   return 2 * eps;
273 }
274 
usage(const char * const exe)275 static void usage(const char* const exe)
276 {
277   printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n"
278          "-h: display this information.\n"
279          "-q: quiet mode -- do not print computed error.\n"
280          "-r: trigger a race condition.\n"
281          "-t<n>: use <n> threads.\n"
282          "<m>: matrix size.\n",
283          exe);
284 }
285 
main(int argc,char ** argv)286 int main(int argc, char** argv)
287 {
288   int matrix_size;
289   int nthread = 1;
290   int silent = 0;
291   int optchar;
292   elem_t *a, *inv, *prod;
293   elem_t eps;
294   double error;
295   double ratio;
296 
297   while ((optchar = getopt(argc, argv, "hqrt:")) != EOF)
298   {
299     switch (optchar)
300     {
301     case 'h': usage(argv[0]); return 1;
302     case 'q': silent = 1; break;
303     case 'r': s_trigger_race = 1; break;
304     case 't': nthread = atoi(optarg); break;
305     default:
306       return 1;
307     }
308   }
309 
310   if (optind + 1 != argc)
311   {
312     fprintf(stderr, "Error: wrong number of arguments.\n");
313     return 1;
314   }
315   matrix_size = atoi(argv[optind]);
316 
317   /* Error checking. */
318   assert(matrix_size >= 1);
319   assert(nthread >= 1);
320 
321   omp_set_num_threads(nthread);
322   omp_set_dynamic(0);
323 
324   eps = epsilon();
325   a = new_matrix(matrix_size, matrix_size);
326   init_matrix(a, matrix_size, matrix_size);
327   inv = invert_matrix(a, matrix_size);
328   prod = multiply_matrices(a, matrix_size, matrix_size,
329                            inv, matrix_size, matrix_size);
330   error = identity_error(prod, matrix_size);
331   ratio = error / (eps * matrix_size);
332   if (! silent)
333   {
334     printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
335            error, eps, ratio);
336   }
337   if (isfinite(ratio) && ratio < 100)
338     printf("Error within bounds.\n");
339   else
340     printf("Error out of bounds.\n");
341   delete_matrix(prod);
342   delete_matrix(inv);
343   delete_matrix(a);
344 
345   return 0;
346 }
347