1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) International Business Machines Corp., 2007, 2008
4 *
5 * Authors: Darren Hart <dvhltc@us.ibm.com>
6 * Dinakar Guniguntala <dino@in.ibm.com>
7 */
8 /*\
9 * [Description]
10 *
11 * Compare running sequential matrix multiplication routines
12 * to running them in parallel to judge multiprocessor
13 * performance
14 */
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <math.h>
19 #include <librttest.h>
20 #include <libstats.h>
21
22 #define MAX_CPUS 8192
23 #define PRIO 43
24 #define MATRIX_SIZE 100
25 #define DEF_OPS 8 /* the higher the number, the more CPU intensive */
26 /* (and therefore SMP performance goes up) */
27 #define PASS_CRITERIA 0.75 /* Avg concurrent time * pass criteria < avg seq time - */
28 /* for every addition of a cpu */
29 #define ITERATIONS 128
30 #define HIST_BUCKETS 100
31
32 #define THREAD_WAIT 1
33 #define THREAD_WORK 2
34 #define THREAD_DONE 3
35
36 #define THREAD_SLEEP 1 * NS_PER_US
37
38 static int ops = DEF_OPS;
39 static int numcpus;
40 static float criteria;
41 static int *tids;
42 static int online_cpu_id = -1;
43 static int iterations = ITERATIONS;
44 static int iterations_percpu;
45
46 stats_container_t sdat, cdat, *curdat;
47 stats_container_t shist, chist;
48 static pthread_barrier_t mult_start;
49
50 struct matrices {
51 double A[MATRIX_SIZE][MATRIX_SIZE];
52 double B[MATRIX_SIZE][MATRIX_SIZE];
53 double C[MATRIX_SIZE][MATRIX_SIZE];
54 };
55
usage(void)56 static void usage(void)
57 {
58 rt_help();
59 printf("matrix_mult specific options:\n");
60 printf
61 (" -l# #: number of multiplications per iteration (load)\n");
62 printf(" -i# #: number of iterations\n");
63 }
64
parse_args(int c,char * v)65 static int parse_args(int c, char *v)
66 {
67 int handled = 1;
68 switch (c) {
69 case 'i':
70 iterations = atoi(v);
71 break;
72 case 'l':
73 ops = atoi(v);
74 break;
75 case 'h':
76 usage();
77 exit(0);
78 default:
79 handled = 0;
80 break;
81 }
82 return handled;
83 }
84
matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],double B[MATRIX_SIZE][MATRIX_SIZE])85 static void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
86 double B[MATRIX_SIZE][MATRIX_SIZE])
87 {
88 int i, j;
89 for (i = 0; i < MATRIX_SIZE; i++) {
90 for (j = 0; j < MATRIX_SIZE; j++) {
91 A[i][j] = (double)(i * j);
92 B[i][j] = (double)((i * j) % 10);
93 }
94 }
95 }
96
matrix_mult(struct matrices * matrices)97 static void matrix_mult(struct matrices *matrices)
98 {
99 int i, j, k;
100
101 matrix_init(matrices->A, matrices->B);
102 for (i = 0; i < MATRIX_SIZE; i++) {
103 int i_m = MATRIX_SIZE - i;
104 for (j = 0; j < MATRIX_SIZE; j++) {
105 double sum = matrices->A[i_m][j] * matrices->B[j][i];
106 for (k = 0; k < MATRIX_SIZE; k++)
107 sum += matrices->A[i_m][k] * matrices->B[k][j];
108 matrices->C[i][j] = sum;
109 }
110 }
111 }
112
matrix_mult_record(struct matrices * matrices,int index)113 static void matrix_mult_record(struct matrices *matrices, int index)
114 {
115 nsec_t start, end, delta;
116 int i;
117
118 start = rt_gettime();
119 for (i = 0; i < ops; i++)
120 matrix_mult(matrices);
121 end = rt_gettime();
122 delta = (long)((end - start) / NS_PER_US);
123 curdat->records[index].x = index;
124 curdat->records[index].y = delta;
125 }
126
set_affinity(void)127 static int set_affinity(void)
128 {
129 static pthread_mutex_t mutex_cpu = PTHREAD_MUTEX_INITIALIZER;
130 cpu_set_t mask;
131 int cpuid;
132
133 pthread_mutex_lock(&mutex_cpu);
134 do {
135 ++online_cpu_id;
136 CPU_ZERO(&mask);
137 CPU_SET(online_cpu_id, &mask);
138
139 if (!sched_setaffinity(0, sizeof(mask), &mask)) {
140 cpuid = online_cpu_id; /* Save this value before unlocking mutex */
141 pthread_mutex_unlock(&mutex_cpu);
142 return cpuid;
143 }
144 } while (online_cpu_id < MAX_CPUS);
145 pthread_mutex_unlock(&mutex_cpu);
146 return -1;
147 }
148
concurrent_thread(void * thread)149 static void *concurrent_thread(void *thread)
150 {
151 struct thread *t = (struct thread *)thread;
152 struct matrices *matrices = (struct matrices *) t->arg;
153 int thread_id = (intptr_t) t->id;
154 int cpuid;
155 int i;
156 int index;
157
158 cpuid = set_affinity();
159 if (cpuid == -1) {
160 fprintf(stderr, "Thread %d: Can't set affinity.\n", thread_id);
161 exit(1);
162 }
163
164 index = iterations_percpu * thread_id; /* To avoid stats overlapping */
165 pthread_barrier_wait(&mult_start);
166 for (i = 0; i < iterations_percpu; i++)
167 matrix_mult_record(matrices, index++);
168
169 return NULL;
170 }
171
main_thread(void)172 static int main_thread(void)
173 {
174 int ret, i, j;
175 nsec_t start, end;
176 long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
177 float savg, cavg;
178 int cpuid;
179 struct matrices *matrices[numcpus];
180
181 for (i = 0; i < numcpus; ++i)
182 matrices[i] = malloc(sizeof(struct matrices));
183
184 if (stats_container_init(&sdat, iterations) ||
185 stats_container_init(&shist, HIST_BUCKETS) ||
186 stats_container_init(&cdat, iterations) ||
187 stats_container_init(&chist, HIST_BUCKETS)
188 ) {
189 fprintf(stderr, "Cannot init stats container\n");
190 exit(1);
191 }
192
193 tids = calloc(numcpus, sizeof(int));
194 if (!tids) {
195 perror("malloc");
196 exit(1);
197 }
198
199 cpuid = set_affinity();
200 if (cpuid == -1) {
201 fprintf(stderr, "Main thread: Can't set affinity.\n");
202 exit(1);
203 }
204
205 /* run matrix mult operation sequentially */
206 curdat = &sdat;
207 curdat->index = iterations - 1;
208 printf("\nRunning sequential operations\n");
209 start = rt_gettime();
210 for (i = 0; i < iterations; i++)
211 matrix_mult_record(matrices[0], i);
212 end = rt_gettime();
213 delta = (long)((end - start) / NS_PER_US);
214
215 savg = delta / iterations; /* don't use the stats record, use the total time recorded */
216 smin = stats_min(&sdat);
217 smax = stats_max(&sdat);
218
219 printf("Min: %ld us\n", smin);
220 printf("Max: %ld us\n", smax);
221 printf("Avg: %.4f us\n", savg);
222 printf("StdDev: %.4f us\n", stats_stddev(&sdat));
223
224 if (stats_hist(&shist, &sdat) ||
225 stats_container_save("sequential",
226 "Matrix Multiplication Sequential Execution Runtime Scatter Plot",
227 "Iteration", "Runtime (us)", &sdat, "points")
228 || stats_container_save("sequential_hist",
229 "Matrix Multiplicatoin Sequential Execution Runtime Histogram",
230 "Runtime (us)", "Samples", &shist, "steps")
231 ) {
232 fprintf(stderr,
233 "Warning: could not save sequential mults stats\n");
234 }
235
236 pthread_barrier_init(&mult_start, NULL, numcpus + 1);
237 set_priority(PRIO);
238 curdat = &cdat;
239 curdat->index = iterations - 1;
240 online_cpu_id = -1; /* Redispatch cpus */
241 /* Create numcpus-1 concurrent threads */
242 for (j = 0; j < numcpus; j++) {
243 tids[j] = create_fifo_thread(concurrent_thread, matrices[j], PRIO);
244 if (tids[j] == -1) {
245 printf
246 ("Thread creation failed (max threads exceeded?)\n");
247 exit(1);
248 }
249 }
250
251 /* run matrix mult operation concurrently */
252 printf("\nRunning concurrent operations\n");
253 pthread_barrier_wait(&mult_start);
254 start = rt_gettime();
255 join_threads();
256 end = rt_gettime();
257
258 delta = (long)((end - start) / NS_PER_US);
259
260 cavg = delta / iterations; /* don't use the stats record, use the total time recorded */
261 cmin = stats_min(&cdat);
262 cmax = stats_max(&cdat);
263
264 printf("Min: %ld us\n", cmin);
265 printf("Max: %ld us\n", cmax);
266 printf("Avg: %.4f us\n", cavg);
267 printf("StdDev: %.4f us\n", stats_stddev(&cdat));
268
269 if (stats_hist(&chist, &cdat) ||
270 stats_container_save("concurrent",
271 "Matrix Multiplication Concurrent Execution Runtime Scatter Plot",
272 "Iteration", "Runtime (us)", &cdat, "points")
273 || stats_container_save("concurrent_hist",
274 "Matrix Multiplication Concurrent Execution Runtime Histogram",
275 "Iteration", "Runtime (us)", &chist,
276 "steps")
277 ) {
278 fprintf(stderr,
279 "Warning: could not save concurrent mults stats\n");
280 }
281
282 printf("\nConcurrent Multipliers:\n");
283 printf("Min: %.4f\n", (float)smin / cmin);
284 printf("Max: %.4f\n", (float)smax / cmax);
285 printf("Avg: %.4f\n", (float)savg / cavg);
286
287 ret = 1;
288 if (savg > (cavg * criteria))
289 ret = 0;
290 printf
291 ("\nCriteria: %.2f * average concurrent time < average sequential time\n",
292 criteria);
293 printf("Result: %s\n", ret ? "FAIL" : "PASS");
294
295 for (i = 0; i < numcpus; i++)
296 free(matrices[i]);
297
298 return ret;
299 }
300
main(int argc,char * argv[])301 int main(int argc, char *argv[])
302 {
303 setup();
304 pass_criteria = PASS_CRITERIA;
305 rt_init("l:i:h", parse_args, argc, argv);
306 numcpus = sysconf(_SC_NPROCESSORS_ONLN);
307 /* the minimum avg concurrent multiplier to pass */
308 criteria = pass_criteria * numcpus;
309 int new_iterations, ret;
310
311 if (iterations <= 0) {
312 fprintf(stderr, "iterations must be greater than zero\n");
313 exit(1);
314 }
315
316 printf("\n---------------------------------------\n");
317 printf("Matrix Multiplication (SMP Performance)\n");
318 printf("---------------------------------------\n\n");
319
320 /* Line below rounds up iterations to a multiple of numcpus.
321 * Without this, having iterations not a mutiple of numcpus causes
322 * stats to segfault (overflow stats array).
323 */
324 new_iterations = (int)((iterations + numcpus - 1) / numcpus) * numcpus;
325 if (new_iterations != iterations)
326 printf
327 ("Rounding up iterations value to nearest multiple of total online CPUs\n");
328
329 iterations = new_iterations;
330 iterations_percpu = iterations / numcpus;
331
332 printf("Running %d iterations\n", iterations);
333 printf("Matrix Dimensions: %dx%d\n", MATRIX_SIZE, MATRIX_SIZE);
334 printf("Calculations per iteration: %d\n", ops);
335 printf("Number of CPUs: %u\n", numcpus);
336
337 set_priority(PRIO);
338 ret = main_thread();
339
340 return ret;
341 }
342