1 /******************************************************************************
2 *
3 * Copyright © International Business Machines Corp., 2007, 2008
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * NAME
20 * matrix_mult.c
21 *
22 * DESCRIPTION
23 * Compare running sequential matrix multiplication routines
24 * to running them in parallel to judge mutliprocessor
25 * performance
26 *
27 * USAGE:
28 * Use run_auto.sh script in current directory to build and run test.
29 *
30 * AUTHOR
31 * Darren Hart <dvhltc@us.ibm.com>
32 *
33 * HISTORY
34 * 2007-Mar-09: Initial version by Darren Hart <dvhltc@us.ibm.com>
35 * 2008-Feb-26: Closely emulate jvm Dinakar Guniguntala <dino@in.ibm.com>
36 *
37 *****************************************************************************/
38
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <math.h>
42 #include <librttest.h>
43 #include <libstats.h>
44
45 #define MAX_CPUS 8192
46 #define PRIO 43
47 #define MATRIX_SIZE 100
48 #define DEF_OPS 8 /* the higher the number, the more CPU intensive */
49 /* (and therefore SMP performance goes up) */
50 #define PASS_CRITERIA 0.75 /* Avg concurrent time * pass criteria < avg seq time - */
51 /* for every addition of a cpu */
52 #define ITERATIONS 128
53 #define HIST_BUCKETS 100
54
55 #define THREAD_WAIT 1
56 #define THREAD_WORK 2
57 #define THREAD_DONE 3
58
59 #define THREAD_SLEEP 1 * NS_PER_US
60
61 static int ops = DEF_OPS;
62 static int numcpus;
63 static float criteria;
64 static int *tids;
65 static int online_cpu_id = -1;
66 static int iterations = ITERATIONS;
67 static int iterations_percpu;
68
69 stats_container_t sdat, cdat, *curdat;
70 stats_container_t shist, chist;
71 static pthread_barrier_t mult_start;
72 static pthread_mutex_t mutex_cpu;
73
usage(void)74 void usage(void)
75 {
76 rt_help();
77 printf("matrix_mult specific options:\n");
78 printf
79 (" -l# #: number of multiplications per iteration (load)\n");
80 printf(" -i# #: number of iterations\n");
81 }
82
parse_args(int c,char * v)83 int parse_args(int c, char *v)
84 {
85 int handled = 1;
86 switch (c) {
87 case 'i':
88 iterations = atoi(v);
89 break;
90 case 'l':
91 ops = atoi(v);
92 break;
93 case 'h':
94 usage();
95 exit(0);
96 default:
97 handled = 0;
98 break;
99 }
100 return handled;
101 }
102
matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],double B[MATRIX_SIZE][MATRIX_SIZE])103 void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
104 double B[MATRIX_SIZE][MATRIX_SIZE])
105 {
106 int i, j;
107 for (i = 0; i < MATRIX_SIZE; i++) {
108 for (j = 0; j < MATRIX_SIZE; j++) {
109 A[i][j] = (double)(i * j);
110 B[i][j] = (double)((i * j) % 10);
111 }
112 }
113 }
114
matrix_mult(int m_size)115 void matrix_mult(int m_size)
116 {
117 double A[m_size][m_size];
118 double B[m_size][m_size];
119 double C[m_size][m_size];
120 int i, j, k;
121
122 matrix_init(A, B);
123 for (i = 0; i < m_size; i++) {
124 int i_m = m_size - i;
125 for (j = 0; j < m_size; j++) {
126 double sum = A[i_m][j] * B[j][i];
127 for (k = 0; k < m_size; k++)
128 sum += A[i_m][k] * B[k][j];
129 C[i][j] = sum;
130 }
131 }
132 }
133
matrix_mult_record(int m_size,int index)134 void matrix_mult_record(int m_size, int index)
135 {
136 nsec_t start, end, delta;
137 int i;
138
139 start = rt_gettime();
140 for (i = 0; i < ops; i++)
141 matrix_mult(MATRIX_SIZE);
142 end = rt_gettime();
143 delta = (long)((end - start) / NS_PER_US);
144 curdat->records[index].x = index;
145 curdat->records[index].y = delta;
146 }
147
set_affinity(void)148 int set_affinity(void)
149 {
150 cpu_set_t mask;
151 int cpuid;
152
153 pthread_mutex_lock(&mutex_cpu);
154 do {
155 ++online_cpu_id;
156 CPU_ZERO(&mask);
157 CPU_SET(online_cpu_id, &mask);
158
159 if (!sched_setaffinity(0, sizeof(mask), &mask)) {
160 cpuid = online_cpu_id; /* Save this value before unlocking mutex */
161 pthread_mutex_unlock(&mutex_cpu);
162 return cpuid;
163 }
164 } while (online_cpu_id < MAX_CPUS);
165 pthread_mutex_unlock(&mutex_cpu);
166 return -1;
167 }
168
concurrent_thread(void * thread)169 void *concurrent_thread(void *thread)
170 {
171 struct thread *t = (struct thread *)thread;
172 int thread_id = (intptr_t) t->id;
173 int cpuid;
174 int i;
175 int index;
176
177 cpuid = set_affinity();
178 if (cpuid == -1) {
179 fprintf(stderr, "Thread %d: Can't set affinity.\n", thread_id);
180 exit(1);
181 }
182
183 index = iterations_percpu * thread_id; /* To avoid stats overlapping */
184 pthread_barrier_wait(&mult_start);
185 for (i = 0; i < iterations_percpu; i++)
186 matrix_mult_record(MATRIX_SIZE, index++);
187
188 return NULL;
189 }
190
main_thread(void)191 void main_thread(void)
192 {
193 int ret, i, j;
194 nsec_t start, end;
195 long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
196 float savg, cavg;
197 int cpuid;
198
199 if (stats_container_init(&sdat, iterations) ||
200 stats_container_init(&shist, HIST_BUCKETS) ||
201 stats_container_init(&cdat, iterations) ||
202 stats_container_init(&chist, HIST_BUCKETS)
203 ) {
204 fprintf(stderr, "Cannot init stats container\n");
205 exit(1);
206 }
207
208 tids = malloc(sizeof(int) * numcpus);
209 if (!tids) {
210 perror("malloc");
211 exit(1);
212 }
213 memset(tids, 0, numcpus);
214
215 cpuid = set_affinity();
216 if (cpuid == -1) {
217 fprintf(stderr, "Main thread: Can't set affinity.\n");
218 exit(1);
219 }
220
221 /* run matrix mult operation sequentially */
222 curdat = &sdat;
223 curdat->index = iterations - 1;
224 printf("\nRunning sequential operations\n");
225 start = rt_gettime();
226 for (i = 0; i < iterations; i++)
227 matrix_mult_record(MATRIX_SIZE, i);
228 end = rt_gettime();
229 delta = (long)((end - start) / NS_PER_US);
230
231 savg = delta / iterations; /* don't use the stats record, use the total time recorded */
232 smin = stats_min(&sdat);
233 smax = stats_max(&sdat);
234
235 printf("Min: %ld us\n", smin);
236 printf("Max: %ld us\n", smax);
237 printf("Avg: %.4f us\n", savg);
238 printf("StdDev: %.4f us\n", stats_stddev(&sdat));
239
240 if (stats_hist(&shist, &sdat) ||
241 stats_container_save("sequential",
242 "Matrix Multiplication Sequential Execution Runtime Scatter Plot",
243 "Iteration", "Runtime (us)", &sdat, "points")
244 || stats_container_save("sequential_hist",
245 "Matrix Multiplicatoin Sequential Execution Runtime Histogram",
246 "Runtime (us)", "Samples", &shist, "steps")
247 ) {
248 fprintf(stderr,
249 "Warning: could not save sequential mults stats\n");
250 }
251
252 pthread_barrier_init(&mult_start, NULL, numcpus + 1);
253 set_priority(PRIO);
254 curdat = &cdat;
255 curdat->index = iterations - 1;
256 online_cpu_id = -1; /* Redispatch cpus */
257 /* Create numcpus-1 concurrent threads */
258 for (j = 0; j < numcpus; j++) {
259 tids[j] = create_fifo_thread(concurrent_thread, NULL, PRIO);
260 if (tids[j] == -1) {
261 printf
262 ("Thread creation failed (max threads exceeded?)\n");
263 exit(1);
264 }
265 }
266
267 /* run matrix mult operation concurrently */
268 printf("\nRunning concurrent operations\n");
269 pthread_barrier_wait(&mult_start);
270 start = rt_gettime();
271 join_threads();
272 end = rt_gettime();
273
274 delta = (long)((end - start) / NS_PER_US);
275
276 cavg = delta / iterations; /* don't use the stats record, use the total time recorded */
277 cmin = stats_min(&cdat);
278 cmax = stats_max(&cdat);
279
280 printf("Min: %ld us\n", cmin);
281 printf("Max: %ld us\n", cmax);
282 printf("Avg: %.4f us\n", cavg);
283 printf("StdDev: %.4f us\n", stats_stddev(&cdat));
284
285 if (stats_hist(&chist, &cdat) ||
286 stats_container_save("concurrent",
287 "Matrix Multiplication Concurrent Execution Runtime Scatter Plot",
288 "Iteration", "Runtime (us)", &cdat, "points")
289 || stats_container_save("concurrent_hist",
290 "Matrix Multiplication Concurrent Execution Runtime Histogram",
291 "Iteration", "Runtime (us)", &chist,
292 "steps")
293 ) {
294 fprintf(stderr,
295 "Warning: could not save concurrent mults stats\n");
296 }
297
298 printf("\nConcurrent Multipliers:\n");
299 printf("Min: %.4f\n", (float)smin / cmin);
300 printf("Max: %.4f\n", (float)smax / cmax);
301 printf("Avg: %.4f\n", (float)savg / cavg);
302
303 ret = 1;
304 if (savg > (cavg * criteria))
305 ret = 0;
306 printf
307 ("\nCriteria: %.2f * average concurrent time < average sequential time\n",
308 criteria);
309 printf("Result: %s\n", ret ? "FAIL" : "PASS");
310
311 return;
312 }
313
main(int argc,char * argv[])314 int main(int argc, char *argv[])
315 {
316 setup();
317 pass_criteria = PASS_CRITERIA;
318 rt_init("l:i:h", parse_args, argc, argv);
319 numcpus = sysconf(_SC_NPROCESSORS_ONLN);
320 /* the minimum avg concurrent multiplier to pass */
321 criteria = pass_criteria * numcpus;
322 int new_iterations;
323
324 if (iterations <= 0) {
325 fprintf(stderr, "iterations must be greater than zero\n");
326 exit(1);
327 }
328
329 printf("\n---------------------------------------\n");
330 printf("Matrix Multiplication (SMP Performance)\n");
331 printf("---------------------------------------\n\n");
332
333 /* Line below rounds up iterations to a multiple of numcpus.
334 * Without this, having iterations not a mutiple of numcpus causes
335 * stats to segfault (overflow stats array).
336 */
337 new_iterations = (int)((iterations + numcpus - 1) / numcpus) * numcpus;
338 if (new_iterations != iterations)
339 printf
340 ("Rounding up iterations value to nearest multiple of total online CPUs\n");
341
342 iterations = new_iterations;
343 iterations_percpu = iterations / numcpus;
344
345 printf("Running %d iterations\n", iterations);
346 printf("Matrix Dimensions: %dx%d\n", MATRIX_SIZE, MATRIX_SIZE);
347 printf("Calculations per iteration: %d\n", ops);
348 printf("Number of CPUs: %u\n", numcpus);
349
350 set_priority(PRIO);
351 main_thread();
352
353 return 0;
354 }
355