1
2 /*
3 * This is the latest version of hackbench.c, that tests scheduler and
4 * unix-socket (or pipe) performance.
5 *
6 * Usage: hackbench [-pipe] <num groups> [process|thread] [loops]
7 *
8 * Build it with:
9 * gcc -g -Wall -O2 -o hackbench hackbench.c -lpthread
10 */
11 #if 0
12
13 Date: Fri, 04 Jan 2008 14:06:26 +0800
14 From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
15 To: LKML <linux-kernel@vger.kernel.org>
16 Subject: Improve hackbench
17 Cc: Ingo Molnar <mingo@elte.hu>, Arjan van de Ven <arjan@infradead.org>
18
19 hackbench tests the Linux scheduler. The original program is at
20 http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
21 Based on this multi-process version, a nice person created a multi-thread
22 version. Pls. see
23 http://www.bullopensource.org/posix/pi-futex/hackbench_pth.c
24
25 When I integrated them into my automation testing system, I found
26 a couple of issues and did some improvements.
27
28 1) Merge hackbench: I integrated hackbench_pth.c into hackbench and added a
29 new parameter which can be used to choose process mode or thread mode. The
30 default mode is process.
31
32 2) It runs too fast and ends in a couple of seconds. Sometimes it's too hard to debug
33 the issues. On my ia64 Montecito machines, the result looks weird when comparing
34 process mode and thread mode.
35 I want a stable result and hope the testing could run for a stable longer time, so I
36 might use performance tools to debug issues.
37 I added another new parameter,`loops`, which can be used to change variable loops,
38 so more messages will be passed from writers to receivers. Parameter 'loops' is equal to
39 100 by default.
40
41 For example on my 8-core x86_64:
42 [ymzhang@lkp-st01-x8664 hackbench]$ uname -a
43 Linux lkp-st01-x8664 2.6.24-rc6 #1 SMP Fri Dec 21 08:32:31 CST 2007 x86_64 x86_64 x86_64 GNU/Linux
44 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench
45 Usage: hackbench [-pipe] <num groups> [process|thread] [loops]
46 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 150 process 1000
47 Time: 151.533
48 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 150 thread 1000
49 Time: 153.666
50
51
52 With the same new parameters, I did captured the SLUB issue discussed on LKML recently.
53
54 3) hackbench_pth.c will fail on ia64 machine because pthread_attr_setstacksize always
55 fails if the stack size is less than 196*1024. I moved this statement within a __ia64__ check.
56
57
58 This new program could be compiled with command line:
59 #gcc -g -Wall -o hackbench hackbench.c -lpthread
60
61
62 Thank Ingo for his great comments!
63
64 -yanmin
65
66 ---
67
68 * Nathan Lynch <ntl@pobox.com> wrote:
69
70 > Here's a fixlet for the hackbench program found at
71 >
72 > http://people.redhat.com/mingo/cfs-scheduler/tools/hackbench.c
73 >
74 > When redirecting hackbench output I am seeing multiple copies of the
75 > "Running with %d*40 (== %d) tasks" line. Need to flush the buffered
76 > output before forking.
77
78 #endif
79
80 /* Test groups of 20 processes spraying to 20 receivers */
81 #include <pthread.h>
82 #include <stdio.h>
83 #include <stdlib.h>
84 #include <string.h>
85 #include <errno.h>
86 #include <unistd.h>
87 #include <sys/types.h>
88 #include <sys/socket.h>
89 #include <sys/wait.h>
90 #include <sys/time.h>
91 #include <sys/poll.h>
92 #include <limits.h>
93
94 #define DATASIZE 100
95 static unsigned int loops = 100;
96 /*
97 * 0 means thread mode and others mean process (default)
98 */
99 static unsigned int process_mode = 1;
100
101 static int use_pipes = 0;
102
103 struct sender_context {
104 unsigned int num_fds;
105 int ready_out;
106 int wakefd;
107 int out_fds[0];
108 };
109
110 struct receiver_context {
111 unsigned int num_packets;
112 int in_fds[2];
113 int ready_out;
114 int wakefd;
115 };
116
117
barf(const char * msg)118 static void barf(const char *msg)
119 {
120 fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
121 exit(1);
122 }
123
print_usage_exit()124 static void print_usage_exit()
125 {
126 printf("Usage: hackbench [-pipe] <num groups> [process|thread] [loops]\n");
127 exit(1);
128 }
129
fdpair(int fds[2])130 static void fdpair(int fds[2])
131 {
132 if (use_pipes) {
133 if (pipe(fds) == 0)
134 return;
135 } else {
136 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
137 return;
138 }
139 barf("Creating fdpair");
140 }
141
142 /* Block until we're ready to go */
ready(int ready_out,int wakefd)143 static void ready(int ready_out, int wakefd)
144 {
145 char dummy;
146 struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
147
148 /* Tell them we're ready. */
149 if (write(ready_out, &dummy, 1) != 1)
150 barf("CLIENT: ready write");
151
152 /* Wait for "GO" signal */
153 if (poll(&pollfd, 1, -1) != 1)
154 barf("poll");
155 }
156
157 /* Sender sprays loops messages down each file descriptor */
sender(struct sender_context * ctx)158 static void *sender(struct sender_context *ctx)
159 {
160 char data[DATASIZE];
161 unsigned int i, j;
162
163 ready(ctx->ready_out, ctx->wakefd);
164
165 /* Now pump to every receiver. */
166 for (i = 0; i < loops; i++) {
167 for (j = 0; j < ctx->num_fds; j++) {
168 int ret, done = 0;
169
170 again:
171 ret = write(ctx->out_fds[j], data + done, sizeof(data)-done);
172 if (ret < 0)
173 barf("SENDER: write");
174 done += ret;
175 if (done < sizeof(data))
176 goto again;
177 }
178 }
179
180 return NULL;
181 }
182
183
184 /* One receiver per fd */
receiver(struct receiver_context * ctx)185 static void *receiver(struct receiver_context* ctx)
186 {
187 unsigned int i;
188
189 if (process_mode)
190 close(ctx->in_fds[1]);
191
192 /* Wait for start... */
193 ready(ctx->ready_out, ctx->wakefd);
194
195 /* Receive them all */
196 for (i = 0; i < ctx->num_packets; i++) {
197 char data[DATASIZE];
198 int ret, done = 0;
199
200 again:
201 ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
202 if (ret < 0)
203 barf("SERVER: read");
204 done += ret;
205 if (done < DATASIZE)
206 goto again;
207 }
208
209 return NULL;
210 }
211
create_worker(void * ctx,void * (* func)(void *))212 pthread_t create_worker(void *ctx, void *(*func)(void *))
213 {
214 pthread_attr_t attr;
215 pthread_t childid;
216 int err;
217
218 if (process_mode) {
219 /* process mode */
220 /* Fork the receiver. */
221 switch (fork()) {
222 case -1: barf("fork()");
223 case 0:
224 (*func) (ctx);
225 exit(0);
226 }
227
228 return (pthread_t) 0;
229 }
230
231 if (pthread_attr_init(&attr) != 0)
232 barf("pthread_attr_init:");
233
234 #ifndef __ia64__
235 if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
236 barf("pthread_attr_setstacksize");
237 #endif
238
239 if ((err=pthread_create(&childid, &attr, func, ctx)) != 0) {
240 fprintf(stderr, "pthread_create failed: %s (%d)\n", strerror(err), err);
241 exit(-1);
242 }
243 return (childid);
244 }
245
reap_worker(pthread_t id)246 void reap_worker(pthread_t id)
247 {
248 int status;
249
250 if (process_mode) {
251 /* process mode */
252 wait(&status);
253 if (!WIFEXITED(status))
254 exit(1);
255 } else {
256 void *status;
257
258 pthread_join(id, &status);
259 }
260 }
261
262 /* One group of senders and receivers */
group(pthread_t * pth,unsigned int num_fds,int ready_out,int wakefd)263 static unsigned int group(pthread_t *pth,
264 unsigned int num_fds,
265 int ready_out,
266 int wakefd)
267 {
268 unsigned int i;
269 struct sender_context* snd_ctx = malloc (sizeof(struct sender_context)
270 +num_fds*sizeof(int));
271
272 for (i = 0; i < num_fds; i++) {
273 int fds[2];
274 struct receiver_context* ctx = malloc (sizeof(*ctx));
275
276 if (!ctx)
277 barf("malloc()");
278
279
280 /* Create the pipe between client and server */
281 fdpair(fds);
282
283 ctx->num_packets = num_fds*loops;
284 ctx->in_fds[0] = fds[0];
285 ctx->in_fds[1] = fds[1];
286 ctx->ready_out = ready_out;
287 ctx->wakefd = wakefd;
288
289 pth[i] = create_worker(ctx, (void *)(void *)receiver);
290
291 snd_ctx->out_fds[i] = fds[1];
292 if (process_mode)
293 close(fds[0]);
294 }
295
296 /* Now we have all the fds, fork the senders */
297 for (i = 0; i < num_fds; i++) {
298 snd_ctx->ready_out = ready_out;
299 snd_ctx->wakefd = wakefd;
300 snd_ctx->num_fds = num_fds;
301
302 pth[num_fds+i] = create_worker(snd_ctx, (void *)(void *)sender);
303 }
304
305 /* Close the fds we have left */
306 if (process_mode)
307 for (i = 0; i < num_fds; i++)
308 close(snd_ctx->out_fds[i]);
309
310 /* Return number of children to reap */
311 return num_fds * 2;
312 }
313
main(int argc,char * argv[])314 int main(int argc, char *argv[])
315 {
316 unsigned int i, num_groups = 10, total_children;
317 struct timeval start, stop, diff;
318 unsigned int num_fds = 20;
319 int readyfds[2], wakefds[2];
320 char dummy;
321 pthread_t *pth_tab;
322
323 if (argv[1] && strcmp(argv[1], "-pipe") == 0) {
324 use_pipes = 1;
325 argc--;
326 argv++;
327 }
328
329 if (argc >= 2 && (num_groups = atoi(argv[1])) == 0)
330 print_usage_exit();
331
332 printf("Running with %d*40 (== %d) tasks.\n",
333 num_groups, num_groups*40);
334
335 fflush(NULL);
336
337 if (argc > 2) {
338 if ( !strcmp(argv[2], "process") )
339 process_mode = 1;
340 else if ( !strcmp(argv[2], "thread") )
341 process_mode = 0;
342 else
343 print_usage_exit();
344 }
345
346 if (argc > 3)
347 loops = atoi(argv[3]);
348
349 pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
350
351 if (!pth_tab)
352 barf("main:malloc()");
353
354 fdpair(readyfds);
355 fdpair(wakefds);
356
357 total_children = 0;
358 for (i = 0; i < num_groups; i++)
359 total_children += group(pth_tab+total_children, num_fds, readyfds[1], wakefds[0]);
360
361 /* Wait for everyone to be ready */
362 for (i = 0; i < total_children; i++)
363 if (read(readyfds[0], &dummy, 1) != 1)
364 barf("Reading for readyfds");
365
366 gettimeofday(&start, NULL);
367
368 /* Kick them off */
369 if (write(wakefds[1], &dummy, 1) != 1)
370 barf("Writing to start them");
371
372 /* Reap them all */
373 for (i = 0; i < total_children; i++)
374 reap_worker(pth_tab[i]);
375
376 gettimeofday(&stop, NULL);
377
378 /* Print time... */
379 timersub(&stop, &start, &diff);
380 printf("Time: %lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
381 exit(0);
382 }
383
384
385