• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) International Business Machines  Corp., 2007, 2008
4  *
5  * Authors: Darren Hart <dvhltc@us.ibm.com>
6  *          Dinakar Guniguntala <dino@in.ibm.com>
7  */
8 /*\
9  * [Description]
10  *
11  * Compare running sequential matrix multiplication routines
12  * to running them in parallel to judge multiprocessor
13  * performance
14  */
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <math.h>
19 #include <librttest.h>
20 #include <libstats.h>
21 
22 #define MAX_CPUS	8192
23 #define PRIO		43
24 #define MATRIX_SIZE	100
25 #define DEF_OPS		8	/* the higher the number, the more CPU intensive */
26 					/* (and therefore SMP performance goes up) */
27 #define PASS_CRITERIA	0.75	/* Avg concurrent time * pass criteria < avg seq time - */
28 					/* for every addition of a cpu */
29 #define ITERATIONS	128
30 #define HIST_BUCKETS	100
31 
32 #define THREAD_WAIT	1
33 #define THREAD_WORK	2
34 #define THREAD_DONE	3
35 
36 #define THREAD_SLEEP	1 * NS_PER_US
37 
38 static int ops = DEF_OPS;
39 static int numcpus;
40 static float criteria;
41 static int *tids;
42 static int online_cpu_id = -1;
43 static int iterations = ITERATIONS;
44 static int iterations_percpu;
45 
46 stats_container_t sdat, cdat, *curdat;
47 stats_container_t shist, chist;
48 static pthread_barrier_t mult_start;
49 
50 struct matrices {
51 	double A[MATRIX_SIZE][MATRIX_SIZE];
52 	double B[MATRIX_SIZE][MATRIX_SIZE];
53 	double C[MATRIX_SIZE][MATRIX_SIZE];
54 };
55 
usage(void)56 static void usage(void)
57 {
58 	rt_help();
59 	printf("matrix_mult specific options:\n");
60 	printf
61 	    ("  -l#	   #: number of multiplications per iteration (load)\n");
62 	printf("  -i#	   #: number of iterations\n");
63 }
64 
parse_args(int c,char * v)65 static int parse_args(int c, char *v)
66 {
67 	int handled = 1;
68 	switch (c) {
69 	case 'i':
70 		iterations = atoi(v);
71 		break;
72 	case 'l':
73 		ops = atoi(v);
74 		break;
75 	case 'h':
76 		usage();
77 		exit(0);
78 	default:
79 		handled = 0;
80 		break;
81 	}
82 	return handled;
83 }
84 
matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],double B[MATRIX_SIZE][MATRIX_SIZE])85 static void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
86 		 double B[MATRIX_SIZE][MATRIX_SIZE])
87 {
88 	int i, j;
89 	for (i = 0; i < MATRIX_SIZE; i++) {
90 		for (j = 0; j < MATRIX_SIZE; j++) {
91 			A[i][j] = (double)(i * j);
92 			B[i][j] = (double)((i * j) % 10);
93 		}
94 	}
95 }
96 
matrix_mult(struct matrices * matrices)97 static void matrix_mult(struct matrices *matrices)
98 {
99 	int i, j, k;
100 
101 	matrix_init(matrices->A, matrices->B);
102 	for (i = 0; i < MATRIX_SIZE; i++) {
103 		int i_m = MATRIX_SIZE - i;
104 		for (j = 0; j < MATRIX_SIZE; j++) {
105 			double sum = matrices->A[i_m][j] *  matrices->B[j][i];
106 			for (k = 0; k < MATRIX_SIZE; k++)
107 				sum +=  matrices->A[i_m][k] *  matrices->B[k][j];
108 			 matrices->C[i][j] = sum;
109 		}
110 	}
111 }
112 
matrix_mult_record(struct matrices * matrices,int index)113 static void matrix_mult_record(struct matrices *matrices, int index)
114 {
115 	nsec_t start, end, delta;
116 	int i;
117 
118 	start = rt_gettime();
119 	for (i = 0; i < ops; i++)
120 		matrix_mult(matrices);
121 	end = rt_gettime();
122 	delta = (long)((end - start) / NS_PER_US);
123 	curdat->records[index].x = index;
124 	curdat->records[index].y = delta;
125 }
126 
set_affinity(void)127 static int set_affinity(void)
128 {
129 	static pthread_mutex_t mutex_cpu = PTHREAD_MUTEX_INITIALIZER;
130 	cpu_set_t mask;
131 	int cpuid;
132 
133 	pthread_mutex_lock(&mutex_cpu);
134 	do {
135 		++online_cpu_id;
136 		CPU_ZERO(&mask);
137 		CPU_SET(online_cpu_id, &mask);
138 
139 		if (!sched_setaffinity(0, sizeof(mask), &mask)) {
140 			cpuid = online_cpu_id;	/* Save this value before unlocking mutex */
141 			pthread_mutex_unlock(&mutex_cpu);
142 			return cpuid;
143 		}
144 	} while (online_cpu_id < MAX_CPUS);
145 	pthread_mutex_unlock(&mutex_cpu);
146 	return -1;
147 }
148 
concurrent_thread(void * thread)149 static void *concurrent_thread(void *thread)
150 {
151 	struct thread *t = (struct thread *)thread;
152 	struct matrices *matrices = (struct matrices *) t->arg;
153 	int thread_id = (intptr_t) t->id;
154 	int cpuid;
155 	int i;
156 	int index;
157 
158 	cpuid = set_affinity();
159 	if (cpuid == -1) {
160 		fprintf(stderr, "Thread %d: Can't set affinity.\n", thread_id);
161 		exit(1);
162 	}
163 
164 	index = iterations_percpu * thread_id;	/* To avoid stats overlapping */
165 	pthread_barrier_wait(&mult_start);
166 	for (i = 0; i < iterations_percpu; i++)
167 		matrix_mult_record(matrices, index++);
168 
169 	return NULL;
170 }
171 
main_thread(void)172 static int main_thread(void)
173 {
174 	int ret, i, j;
175 	nsec_t start, end;
176 	long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
177 	float savg, cavg;
178 	int cpuid;
179 	struct matrices *matrices[numcpus];
180 
181 	for (i = 0; i < numcpus; ++i)
182 		matrices[i] = malloc(sizeof(struct matrices));
183 
184 	if (stats_container_init(&sdat, iterations) ||
185 	    stats_container_init(&shist, HIST_BUCKETS) ||
186 	    stats_container_init(&cdat, iterations) ||
187 	    stats_container_init(&chist, HIST_BUCKETS)
188 	    ) {
189 		fprintf(stderr, "Cannot init stats container\n");
190 		exit(1);
191 	}
192 
193 	tids = calloc(numcpus, sizeof(int));
194 	if (!tids) {
195 		perror("malloc");
196 		exit(1);
197 	}
198 
199 	cpuid = set_affinity();
200 	if (cpuid == -1) {
201 		fprintf(stderr, "Main thread: Can't set affinity.\n");
202 		exit(1);
203 	}
204 
205 	/* run matrix mult operation sequentially */
206 	curdat = &sdat;
207 	curdat->index = iterations - 1;
208 	printf("\nRunning sequential operations\n");
209 	start = rt_gettime();
210 	for (i = 0; i < iterations; i++)
211 		matrix_mult_record(matrices[0], i);
212 	end = rt_gettime();
213 	delta = (long)((end - start) / NS_PER_US);
214 
215 	savg = delta / iterations;	/* don't use the stats record, use the total time recorded */
216 	smin = stats_min(&sdat);
217 	smax = stats_max(&sdat);
218 
219 	printf("Min: %ld us\n", smin);
220 	printf("Max: %ld us\n", smax);
221 	printf("Avg: %.4f us\n", savg);
222 	printf("StdDev: %.4f us\n", stats_stddev(&sdat));
223 
224 	if (stats_hist(&shist, &sdat) ||
225 	    stats_container_save("sequential",
226 				 "Matrix Multiplication Sequential Execution Runtime Scatter Plot",
227 				 "Iteration", "Runtime (us)", &sdat, "points")
228 	    || stats_container_save("sequential_hist",
229 				    "Matrix Multiplicatoin Sequential Execution Runtime Histogram",
230 				    "Runtime (us)", "Samples", &shist, "steps")
231 	    ) {
232 		fprintf(stderr,
233 			"Warning: could not save sequential mults stats\n");
234 	}
235 
236 	pthread_barrier_init(&mult_start, NULL, numcpus + 1);
237 	set_priority(PRIO);
238 	curdat = &cdat;
239 	curdat->index = iterations - 1;
240 	online_cpu_id = -1;	/* Redispatch cpus */
241 	/* Create numcpus-1 concurrent threads */
242 	for (j = 0; j < numcpus; j++) {
243 		tids[j] = create_fifo_thread(concurrent_thread, matrices[j], PRIO);
244 		if (tids[j] == -1) {
245 			printf
246 			    ("Thread creation failed (max threads exceeded?)\n");
247 			exit(1);
248 		}
249 	}
250 
251 	/* run matrix mult operation concurrently */
252 	printf("\nRunning concurrent operations\n");
253 	pthread_barrier_wait(&mult_start);
254 	start = rt_gettime();
255 	join_threads();
256 	end = rt_gettime();
257 
258 	delta = (long)((end - start) / NS_PER_US);
259 
260 	cavg = delta / iterations;	/* don't use the stats record, use the total time recorded */
261 	cmin = stats_min(&cdat);
262 	cmax = stats_max(&cdat);
263 
264 	printf("Min: %ld us\n", cmin);
265 	printf("Max: %ld us\n", cmax);
266 	printf("Avg: %.4f us\n", cavg);
267 	printf("StdDev: %.4f us\n", stats_stddev(&cdat));
268 
269 	if (stats_hist(&chist, &cdat) ||
270 	    stats_container_save("concurrent",
271 				 "Matrix Multiplication Concurrent Execution Runtime Scatter Plot",
272 				 "Iteration", "Runtime (us)", &cdat, "points")
273 	    || stats_container_save("concurrent_hist",
274 				    "Matrix Multiplication Concurrent Execution Runtime Histogram",
275 				    "Iteration", "Runtime (us)", &chist,
276 				    "steps")
277 	    ) {
278 		fprintf(stderr,
279 			"Warning: could not save concurrent mults stats\n");
280 	}
281 
282 	printf("\nConcurrent Multipliers:\n");
283 	printf("Min: %.4f\n", (float)smin / cmin);
284 	printf("Max: %.4f\n", (float)smax / cmax);
285 	printf("Avg: %.4f\n", (float)savg / cavg);
286 
287 	ret = 1;
288 	if (savg > (cavg * criteria))
289 		ret = 0;
290 	printf
291 	    ("\nCriteria: %.2f * average concurrent time < average sequential time\n",
292 	     criteria);
293 	printf("Result: %s\n", ret ? "FAIL" : "PASS");
294 
295 	for (i = 0; i < numcpus; i++)
296 		free(matrices[i]);
297 
298 	return ret;
299 }
300 
main(int argc,char * argv[])301 int main(int argc, char *argv[])
302 {
303 	setup();
304 	pass_criteria = PASS_CRITERIA;
305 	rt_init("l:i:h", parse_args, argc, argv);
306 	numcpus = sysconf(_SC_NPROCESSORS_ONLN);
307 	/* the minimum avg concurrent multiplier to pass */
308 	criteria = pass_criteria * numcpus;
309 	int new_iterations, ret;
310 
311 	if (iterations <= 0) {
312 		fprintf(stderr, "iterations must be greater than zero\n");
313 		exit(1);
314 	}
315 
316 	printf("\n---------------------------------------\n");
317 	printf("Matrix Multiplication (SMP Performance)\n");
318 	printf("---------------------------------------\n\n");
319 
320 	/* Line below rounds up iterations to a multiple of numcpus.
321 	 * Without this, having iterations not a mutiple of numcpus causes
322 	 * stats to segfault (overflow stats array).
323 	 */
324 	new_iterations = (int)((iterations + numcpus - 1) / numcpus) * numcpus;
325 	if (new_iterations != iterations)
326 		printf
327 		    ("Rounding up iterations value to nearest multiple of total online CPUs\n");
328 
329 	iterations = new_iterations;
330 	iterations_percpu = iterations / numcpus;
331 
332 	printf("Running %d iterations\n", iterations);
333 	printf("Matrix Dimensions: %dx%d\n", MATRIX_SIZE, MATRIX_SIZE);
334 	printf("Calculations per iteration: %d\n", ops);
335 	printf("Number of CPUs: %u\n", numcpus);
336 
337 	set_priority(PRIO);
338 	ret = main_thread();
339 
340 	return ret;
341 }
342