• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *
3  *   Copyright © International Business Machines  Corp., 2007, 2008
4  *
5  *   This program is free software;  you can redistribute it and/or modify
6  *   it under the terms of the GNU General Public License as published by
7  *   the Free Software Foundation; either version 2 of the License, or
8  *   (at your option) any later version.
9  *
10  *   This program is distributed in the hope that it will be useful,
11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13  *   the GNU General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program;  if not, write to the Free Software
17  *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  *
19  * NAME
20  *      matrix_mult.c
21  *
22  * DESCRIPTION
23  *      Compare running sequential matrix multiplication routines
24  *      to running them in parallel to judge mutliprocessor
25  *      performance
26  *
27  * USAGE:
28  *      Use run_auto.sh script in current directory to build and run test.
29  *
30  * AUTHOR
31  *      Darren Hart <dvhltc@us.ibm.com>
32  *
33  * HISTORY
34  *      2007-Mar-09:  Initial version by Darren Hart <dvhltc@us.ibm.com>
35  *      2008-Feb-26:  Closely emulate jvm Dinakar Guniguntala <dino@in.ibm.com>
36  *
37  *****************************************************************************/
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <math.h>
42 #include <librttest.h>
43 #include <libstats.h>
44 
45 #define MAX_CPUS	8192
46 #define PRIO		43
47 #define MATRIX_SIZE	100
48 #define DEF_OPS		8	/* the higher the number, the more CPU intensive */
49 					/* (and therefore SMP performance goes up) */
50 #define PASS_CRITERIA	0.75	/* Avg concurrent time * pass criteria < avg seq time - */
51 					/* for every addition of a cpu */
52 #define ITERATIONS	128
53 #define HIST_BUCKETS	100
54 
55 #define THREAD_WAIT	1
56 #define THREAD_WORK	2
57 #define THREAD_DONE	3
58 
59 #define THREAD_SLEEP	1 * NS_PER_US
60 
61 static int ops = DEF_OPS;
62 static int numcpus;
63 static float criteria;
64 static int *tids;
65 static int online_cpu_id = -1;
66 static int iterations = ITERATIONS;
67 static int iterations_percpu;
68 
69 stats_container_t sdat, cdat, *curdat;
70 stats_container_t shist, chist;
71 static pthread_barrier_t mult_start;
72 static pthread_mutex_t mutex_cpu;
73 
usage(void)74 void usage(void)
75 {
76 	rt_help();
77 	printf("matrix_mult specific options:\n");
78 	printf
79 	    ("  -l#	   #: number of multiplications per iteration (load)\n");
80 	printf("  -i#	   #: number of iterations\n");
81 }
82 
parse_args(int c,char * v)83 int parse_args(int c, char *v)
84 {
85 	int handled = 1;
86 	switch (c) {
87 	case 'i':
88 		iterations = atoi(v);
89 		break;
90 	case 'l':
91 		ops = atoi(v);
92 		break;
93 	case 'h':
94 		usage();
95 		exit(0);
96 	default:
97 		handled = 0;
98 		break;
99 	}
100 	return handled;
101 }
102 
matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],double B[MATRIX_SIZE][MATRIX_SIZE])103 void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE],
104 		 double B[MATRIX_SIZE][MATRIX_SIZE])
105 {
106 	int i, j;
107 	for (i = 0; i < MATRIX_SIZE; i++) {
108 		for (j = 0; j < MATRIX_SIZE; j++) {
109 			A[i][j] = (double)(i * j);
110 			B[i][j] = (double)((i * j) % 10);
111 		}
112 	}
113 }
114 
matrix_mult(int m_size)115 void matrix_mult(int m_size)
116 {
117 	double A[m_size][m_size];
118 	double B[m_size][m_size];
119 	double C[m_size][m_size];
120 	int i, j, k;
121 
122 	matrix_init(A, B);
123 	for (i = 0; i < m_size; i++) {
124 		int i_m = m_size - i;
125 		for (j = 0; j < m_size; j++) {
126 			double sum = A[i_m][j] * B[j][i];
127 			for (k = 0; k < m_size; k++)
128 				sum += A[i_m][k] * B[k][j];
129 			C[i][j] = sum;
130 		}
131 	}
132 }
133 
matrix_mult_record(int m_size,int index)134 void matrix_mult_record(int m_size, int index)
135 {
136 	nsec_t start, end, delta;
137 	int i;
138 
139 	start = rt_gettime();
140 	for (i = 0; i < ops; i++)
141 		matrix_mult(MATRIX_SIZE);
142 	end = rt_gettime();
143 	delta = (long)((end - start) / NS_PER_US);
144 	curdat->records[index].x = index;
145 	curdat->records[index].y = delta;
146 }
147 
set_affinity(void)148 int set_affinity(void)
149 {
150 	cpu_set_t mask;
151 	int cpuid;
152 
153 	pthread_mutex_lock(&mutex_cpu);
154 	do {
155 		++online_cpu_id;
156 		CPU_ZERO(&mask);
157 		CPU_SET(online_cpu_id, &mask);
158 
159 		if (!sched_setaffinity(0, sizeof(mask), &mask)) {
160 			cpuid = online_cpu_id;	/* Save this value before unlocking mutex */
161 			pthread_mutex_unlock(&mutex_cpu);
162 			return cpuid;
163 		}
164 	} while (online_cpu_id < MAX_CPUS);
165 	pthread_mutex_unlock(&mutex_cpu);
166 	return -1;
167 }
168 
concurrent_thread(void * thread)169 void *concurrent_thread(void *thread)
170 {
171 	struct thread *t = (struct thread *)thread;
172 	int thread_id = (intptr_t) t->id;
173 	int cpuid;
174 	int i;
175 	int index;
176 
177 	cpuid = set_affinity();
178 	if (cpuid == -1) {
179 		fprintf(stderr, "Thread %d: Can't set affinity.\n", thread_id);
180 		exit(1);
181 	}
182 
183 	index = iterations_percpu * thread_id;	/* To avoid stats overlapping */
184 	pthread_barrier_wait(&mult_start);
185 	for (i = 0; i < iterations_percpu; i++)
186 		matrix_mult_record(MATRIX_SIZE, index++);
187 
188 	return NULL;
189 }
190 
main_thread(void)191 void main_thread(void)
192 {
193 	int ret, i, j;
194 	nsec_t start, end;
195 	long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
196 	float savg, cavg;
197 	int cpuid;
198 
199 	if (stats_container_init(&sdat, iterations) ||
200 	    stats_container_init(&shist, HIST_BUCKETS) ||
201 	    stats_container_init(&cdat, iterations) ||
202 	    stats_container_init(&chist, HIST_BUCKETS)
203 	    ) {
204 		fprintf(stderr, "Cannot init stats container\n");
205 		exit(1);
206 	}
207 
208 	tids = malloc(sizeof(int) * numcpus);
209 	if (!tids) {
210 		perror("malloc");
211 		exit(1);
212 	}
213 	memset(tids, 0, numcpus);
214 
215 	cpuid = set_affinity();
216 	if (cpuid == -1) {
217 		fprintf(stderr, "Main thread: Can't set affinity.\n");
218 		exit(1);
219 	}
220 
221 	/* run matrix mult operation sequentially */
222 	curdat = &sdat;
223 	curdat->index = iterations - 1;
224 	printf("\nRunning sequential operations\n");
225 	start = rt_gettime();
226 	for (i = 0; i < iterations; i++)
227 		matrix_mult_record(MATRIX_SIZE, i);
228 	end = rt_gettime();
229 	delta = (long)((end - start) / NS_PER_US);
230 
231 	savg = delta / iterations;	/* don't use the stats record, use the total time recorded */
232 	smin = stats_min(&sdat);
233 	smax = stats_max(&sdat);
234 
235 	printf("Min: %ld us\n", smin);
236 	printf("Max: %ld us\n", smax);
237 	printf("Avg: %.4f us\n", savg);
238 	printf("StdDev: %.4f us\n", stats_stddev(&sdat));
239 
240 	if (stats_hist(&shist, &sdat) ||
241 	    stats_container_save("sequential",
242 				 "Matrix Multiplication Sequential Execution Runtime Scatter Plot",
243 				 "Iteration", "Runtime (us)", &sdat, "points")
244 	    || stats_container_save("sequential_hist",
245 				    "Matrix Multiplicatoin Sequential Execution Runtime Histogram",
246 				    "Runtime (us)", "Samples", &shist, "steps")
247 	    ) {
248 		fprintf(stderr,
249 			"Warning: could not save sequential mults stats\n");
250 	}
251 
252 	pthread_barrier_init(&mult_start, NULL, numcpus + 1);
253 	set_priority(PRIO);
254 	curdat = &cdat;
255 	curdat->index = iterations - 1;
256 	online_cpu_id = -1;	/* Redispatch cpus */
257 	/* Create numcpus-1 concurrent threads */
258 	for (j = 0; j < numcpus; j++) {
259 		tids[j] = create_fifo_thread(concurrent_thread, NULL, PRIO);
260 		if (tids[j] == -1) {
261 			printf
262 			    ("Thread creation failed (max threads exceeded?)\n");
263 			exit(1);
264 		}
265 	}
266 
267 	/* run matrix mult operation concurrently */
268 	printf("\nRunning concurrent operations\n");
269 	pthread_barrier_wait(&mult_start);
270 	start = rt_gettime();
271 	join_threads();
272 	end = rt_gettime();
273 
274 	delta = (long)((end - start) / NS_PER_US);
275 
276 	cavg = delta / iterations;	/* don't use the stats record, use the total time recorded */
277 	cmin = stats_min(&cdat);
278 	cmax = stats_max(&cdat);
279 
280 	printf("Min: %ld us\n", cmin);
281 	printf("Max: %ld us\n", cmax);
282 	printf("Avg: %.4f us\n", cavg);
283 	printf("StdDev: %.4f us\n", stats_stddev(&cdat));
284 
285 	if (stats_hist(&chist, &cdat) ||
286 	    stats_container_save("concurrent",
287 				 "Matrix Multiplication Concurrent Execution Runtime Scatter Plot",
288 				 "Iteration", "Runtime (us)", &cdat, "points")
289 	    || stats_container_save("concurrent_hist",
290 				    "Matrix Multiplication Concurrent Execution Runtime Histogram",
291 				    "Iteration", "Runtime (us)", &chist,
292 				    "steps")
293 	    ) {
294 		fprintf(stderr,
295 			"Warning: could not save concurrent mults stats\n");
296 	}
297 
298 	printf("\nConcurrent Multipliers:\n");
299 	printf("Min: %.4f\n", (float)smin / cmin);
300 	printf("Max: %.4f\n", (float)smax / cmax);
301 	printf("Avg: %.4f\n", (float)savg / cavg);
302 
303 	ret = 1;
304 	if (savg > (cavg * criteria))
305 		ret = 0;
306 	printf
307 	    ("\nCriteria: %.2f * average concurrent time < average sequential time\n",
308 	     criteria);
309 	printf("Result: %s\n", ret ? "FAIL" : "PASS");
310 
311 	return;
312 }
313 
main(int argc,char * argv[])314 int main(int argc, char *argv[])
315 {
316 	setup();
317 	pass_criteria = PASS_CRITERIA;
318 	rt_init("l:i:h", parse_args, argc, argv);
319 	numcpus = sysconf(_SC_NPROCESSORS_ONLN);
320 	/* the minimum avg concurrent multiplier to pass */
321 	criteria = pass_criteria * numcpus;
322 	int new_iterations;
323 
324 	if (iterations <= 0) {
325 		fprintf(stderr, "iterations must be greater than zero\n");
326 		exit(1);
327 	}
328 
329 	printf("\n---------------------------------------\n");
330 	printf("Matrix Multiplication (SMP Performance)\n");
331 	printf("---------------------------------------\n\n");
332 
333 	/* Line below rounds up iterations to a multiple of numcpus.
334 	 * Without this, having iterations not a mutiple of numcpus causes
335 	 * stats to segfault (overflow stats array).
336 	 */
337 	new_iterations = (int)((iterations + numcpus - 1) / numcpus) * numcpus;
338 	if (new_iterations != iterations)
339 		printf
340 		    ("Rounding up iterations value to nearest multiple of total online CPUs\n");
341 
342 	iterations = new_iterations;
343 	iterations_percpu = iterations / numcpus;
344 
345 	printf("Running %d iterations\n", iterations);
346 	printf("Matrix Dimensions: %dx%d\n", MATRIX_SIZE, MATRIX_SIZE);
347 	printf("Calculations per iteration: %d\n", ops);
348 	printf("Number of CPUs: %u\n", numcpus);
349 
350 	set_priority(PRIO);
351 	main_thread();
352 
353 	return 0;
354 }
355