1 /*
2 * The main pounder process controller and scheduler program.
3 * Author: Darrick Wong <djwong@us.ibm.com>
4 */
5
6 /*
7 * Copyright (C) 2003-2006 IBM
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 * 02111-1307, USA.
23 */
24
25 #include <errno.h>
26 #include <signal.h>
27 #include <sys/wait.h>
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <ctype.h>
33 #include <stdarg.h>
34 #include <sys/time.h>
35 #include <time.h>
36 #include <stdio.h>
37 #include <dirent.h>
38 #include <sys/stat.h>
39
40 #include "proclist.h"
41 #include "debug.h"
42
43 // List of subprocesses to wait upon
44 struct proclist_t wait_ons = { NULL };
45 struct proclist_t daemons = { NULL };
46
47 static int is_leader = 0;
48 static char *pidfile = "";
49
50 static inline int is_executable(const char *fname);
51 static inline int is_directory(const char *fname);
52 static inline int test_filter(const struct dirent *p);
53 static inline int test_sort(const struct dirent **a, const struct dirent **b);
54 static int wait_for_pids(void);
55 static void wait_for_daemons(void);
56 static void note_process(pid_t pid, char *name);
57 static void note_daemon(pid_t pid, char *name);
58 static void kill_tests(void);
59 static void kill_daemons(void);
60 static int process_dir(const char *fname);
61 static pid_t spawn_test(char *fname);
62 static void note_child(pid_t pid, char *fname, char type);
63 static int child_finished(const char *name, int stat);
64 static char *progname;
65
66 #define TEST_PATH_LEN 512
67 #define TEST_FORK_WAIT 100
68
69 /**
70 * Kill everything upon ^C.
71 */
jump_out(int signum)72 static void jump_out(int signum)
73 {
74 pounder_fprintf(stdout, "Control-C received; aborting!\n");
75 //unlink("pounder_pgrp");
76 kill_tests();
77 kill_daemons();
78 if (is_leader) {
79 unlink(pidfile);
80 }
81 exit(0);
82 }
83
84 /**
85 * Kills tests launched from within.
86 */
kill_tests(void)87 static void kill_tests(void)
88 {
89 struct proclist_item_t *curr;
90
91 curr = wait_ons.head;
92 while (curr != NULL) {
93 kill(-curr->pid, SIGTERM);
94 curr = curr->next;
95 }
96 }
97
98 /**
99 * Kills daemons launched from within.
100 */
kill_daemons(void)101 static void kill_daemons(void)
102 {
103 struct proclist_item_t *curr;
104
105 curr = daemons.head;
106 while (curr != NULL) {
107 kill(-curr->pid, SIGTERM);
108 curr = curr->next;
109 }
110 }
111
112 /**
113 * Record the pounder leader's PID in a file.
114 */
record_pid(void)115 static void record_pid(void)
116 {
117 FILE *fp;
118
119 pidfile = getenv("POUNDER_PIDFILE");
120 if (pidfile == NULL) {
121 pidfile = "pounder.pid";
122 }
123
124 fp = fopen(pidfile, "w");
125 if (fp == NULL) {
126 perror(pidfile);
127 }
128 fprintf(fp, "%d", getpid());
129 fclose(fp);
130 }
131
132 /**
133 * Main program. Returns 1 if all programs run successfully, 0 if
134 * something failed and -1 if there was an error running programs.
135 */
main(int argc,char * argv[])136 int main(int argc, char *argv[])
137 {
138 int retcode;
139 struct sigaction zig;
140 pid_t pid;
141 char *c;
142
143 /* Check parameters */
144 if (argc < 2) {
145 fprintf(stderr, "Usage: %s test_prog\n", argv[0]);
146 return 1;
147 }
148
149 if (argc > 2 && strcmp(argv[2], "--leader") == 0) {
150 pounder_fprintf(stdout,
151 "Logging this test output to %s/POUNDERLOG.\n",
152 getenv("POUNDER_LOGDIR"));
153 is_leader = 1;
154 record_pid();
155 }
156
157 progname = argv[0];
158
159 /* Set up signals */
160 memset(&zig, 0x00, sizeof(zig));
161 zig.sa_handler = jump_out;
162 sigaction(SIGHUP, &zig, NULL);
163 sigaction(SIGINT, &zig, NULL);
164 sigaction(SIGTERM, &zig, NULL);
165
166 if (is_directory(argv[1])) {
167 retcode = process_dir(argv[1]);
168 } else {
169 if (is_executable(argv[1])) {
170 c = rindex(argv[1], '/');
171 c++;
172
173 // Start the test
174 pid = spawn_test(argv[1]);
175 if (pid < 0) {
176 perror("fork");
177 retcode = -1;
178 goto out;
179 }
180 // Track the test
181 note_process(pid, argv[1]);
182 if (wait_for_pids() == 0) {
183 retcode = 1;
184 } else {
185 retcode = 0;
186 }
187 } else {
188 pounder_fprintf(stderr,
189 "%s: Not a directory or a test.\n",
190 argv[1]);
191 retcode = -1;
192 }
193 }
194
195 out:
196 kill_daemons();
197 wait_for_daemons();
198 if (is_leader) {
199 if (retcode == 0) {
200 pounder_fprintf(stdout, "%s: %s.\n", argv[1], pass_msg);
201 } else if (retcode < 0 || retcode == 255) {
202 pounder_fprintf(stdout, "%s: %s with code %d.\n",
203 argv[1], abort_msg, retcode);
204 } else {
205 pounder_fprintf(stdout, "%s: %s with code %d.\n",
206 argv[1], fail_msg, retcode);
207 }
208 unlink(pidfile);
209 }
210 exit(retcode);
211 }
212
213 /**
214 * Helper function to determine if a file is executable.
215 * Returns 1 if yes, 0 if no and -1 if error.
216 */
is_executable(const char * fname)217 static inline int is_executable(const char *fname)
218 {
219 struct stat tmp;
220
221 if (stat(fname, &tmp) < 0) {
222 return -1;
223 }
224
225 if (geteuid() == 0) {
226 return 1;
227 } else if (geteuid() == tmp.st_uid) {
228 return tmp.st_mode & S_IXUSR;
229 } else if (getegid() == tmp.st_gid) {
230 return tmp.st_mode & S_IXGRP;
231 } else {
232 return tmp.st_mode & S_IXOTH;
233 }
234 }
235
236 /**
237 * Helper function to determine if a file is a directory.
238 * Returns 1 if yes, 0 if no and -1 if error.
239 */
is_directory(const char * fname)240 static inline int is_directory(const char *fname)
241 {
242 struct stat tmp;
243
244 if (stat(fname, &tmp) < 0) {
245 return 0;
246 }
247
248 return S_ISDIR(tmp.st_mode);
249 }
250
251 /**
252 * Returns 1 if the directory entry's filename fits the test name pattern.
253 */
test_filter(const struct dirent * p)254 static inline int test_filter(const struct dirent *p)
255 {
256 return ((p->d_name[0] == 'T' || p->d_name[0] == 'D')
257 && isdigit(p->d_name[1]) && isdigit(p->d_name[2]));
258 }
259
260 /**
261 * Simple routine to compare two tests names such that lower number/name pairs
262 * are considered "lesser" values.
263 */
264 //static inline int test_sort(const struct dirent **a, const struct dirent **b) {
test_sort(const struct dirent ** a,const struct dirent ** b)265 static inline int test_sort(const struct dirent **a, const struct dirent **b)
266 {
267 return strcmp(&(*b)->d_name[1], &(*a)->d_name[1]);
268 }
269
270 /**
271 * Takes the wait() status integer and prints a log message.
272 * Returns 1 if there was a failure.
273 */
child_finished(const char * name,int stat)274 static int child_finished(const char *name, int stat)
275 {
276 int x;
277 // did we sig-exit?
278 if (WIFSIGNALED(stat)) {
279 pounder_fprintf(stdout, "%s: %s on signal %d.\n",
280 name, fail_msg, WTERMSIG(stat));
281 return 1;
282 } else {
283 x = WEXITSTATUS(stat);
284 if (x == 0) {
285 pounder_fprintf(stdout, "%s: %s.\n", name, pass_msg);
286 return 0;
287 } else if (x < 0 || x == 255) {
288 pounder_fprintf(stdout, "%s: %s with code %d.\n",
289 name, abort_msg, x);
290 return 1;
291 // FIXME: add test to blacklist
292 } else {
293 pounder_fprintf(stdout, "%s: %s with code %d.\n",
294 name, fail_msg, x);
295 return 1;
296 }
297 }
298 }
299
300 /**
301 * Wait for some number of PIDs. If any of them return nonzero, we
302 * assume that there was some kind of failure and return 0. Otherwise,
303 * we return 1 to indicate success.
304 */
wait_for_pids(void)305 static int wait_for_pids(void)
306 {
307 struct proclist_item_t *curr;
308 int i, stat, res, nprocs;
309 pid_t pid;
310
311 res = 1;
312
313 // figure out how many times we have to wait...
314 curr = wait_ons.head;
315 nprocs = 0;
316 while (curr != NULL) {
317 nprocs++;
318 curr = curr->next;
319 }
320
321 // now wait for children.
322 for (i = 0; i < nprocs;) {
323 pid = wait(&stat);
324
325 if (pid < 0) {
326 perror("wait");
327 return 0;
328 }
329 // go find the child
330 curr = wait_ons.head;
331 while (curr != NULL) {
332 if (curr->pid == pid) {
333 res =
334 (child_finished(curr->name, stat) ? 0 :
335 res);
336
337 // one less pid to wait for
338 i++;
339
340 // stop observing
341 remove_from_proclist(&wait_ons, curr);
342 free(curr->name);
343 free(curr);
344 break;
345 }
346 curr = curr->next;
347 }
348
349 curr = daemons.head;
350 while (curr != NULL) {
351 if (curr->pid == pid) {
352 child_finished(curr->name, stat);
353 remove_from_proclist(&daemons, curr);
354 free(curr->name);
355 free(curr);
356 break;
357 }
358 curr = curr->next;
359 }
360 }
361
362 return res;
363 }
364
365 /**
366 * Wait for daemons to finish. This function does NOT wait for wait_ons.
367 */
wait_for_daemons(void)368 static void wait_for_daemons(void)
369 {
370 struct proclist_item_t *curr;
371 int i, stat, res, nprocs;
372 pid_t pid;
373
374 res = 1;
375
376 // figure out how many times we have to wait...
377 curr = daemons.head;
378 nprocs = 0;
379 while (curr != NULL) {
380 nprocs++;
381 curr = curr->next;
382 }
383
384 // now wait for daemons.
385 for (i = 0; i < nprocs;) {
386 pid = wait(&stat);
387
388 if (pid < 0) {
389 perror("wait");
390 if (errno == ECHILD) {
391 return;
392 }
393 }
394
395 curr = daemons.head;
396 while (curr != NULL) {
397 if (curr->pid == pid) {
398 child_finished(curr->name, stat);
399 i++;
400 remove_from_proclist(&daemons, curr);
401 free(curr->name);
402 free(curr);
403 break;
404 }
405 curr = curr->next;
406 }
407 }
408 }
409
410 /**
411 * Creates a record of processes that we want to watch for.
412 */
note_process(pid_t pid,char * name)413 static void note_process(pid_t pid, char *name)
414 {
415 struct proclist_item_t *it;
416
417 it = calloc(1, sizeof(struct proclist_item_t));
418 if (it == NULL) {
419 perror("malloc proclist_item_t");
420 // XXX: Maybe we should just waitpid?
421 return;
422 }
423 it->pid = pid;
424 it->name = calloc(strlen(name) + 1, sizeof(char));
425 if (it->name == NULL) {
426 perror("malloc procitem name");
427 // XXX: Maybe we should just waitpid?
428 return;
429 }
430 strcpy(it->name, name);
431
432 add_to_proclist(&wait_ons, it);
433 }
434
435 /**
436 * Creates a record of daemons that should be killed on exit.
437 */
note_daemon(pid_t pid,char * name)438 static void note_daemon(pid_t pid, char *name)
439 {
440 struct proclist_item_t *it;
441
442 it = calloc(1, sizeof(struct proclist_item_t));
443 if (it == NULL) {
444 perror("malloc proclist_item_t");
445 // XXX: what do we do here?
446 return;
447 }
448 it->pid = pid;
449 it->name = calloc(strlen(name) + 1, sizeof(char));
450 if (it->name == NULL) {
451 perror("malloc procitem name");
452 // XXX: what do we do here?
453 return;
454 }
455 strcpy(it->name, name);
456
457 add_to_proclist(&daemons, it);
458 }
459
460 /**
461 * Starts a test, with the stdin/out/err fd's redirected to logs.
462 * The 'fname' parameter should be a relative path from $POUNDER_HOME.
463 */
spawn_test(char * fname)464 static pid_t spawn_test(char *fname)
465 {
466 pid_t pid;
467 int fd, tmp;
468 char buf[TEST_PATH_LEN], buf2[TEST_PATH_LEN];
469 char *last_slash;
470
471 pid = fork();
472 if (pid == 0) {
473 if (setpgrp() < 0) {
474 perror("setpgid");
475 }
476
477 pounder_fprintf(stdout, "%s: %s test.\n", fname, start_msg);
478
479 // reroute stdin
480 fd = open("/dev/null", O_RDWR);
481 if (fd < 0) {
482 perror("/dev/null");
483 exit(-1);
484 }
485 close(0);
486 tmp = dup2(fd, 0);
487 if (tmp < 0) {
488 perror("dup(/dev/null)");
489 exit(-1);
490 }
491 close(fd);
492
493 // generate log name-- '/' -> '-'.
494 snprintf(buf2, TEST_PATH_LEN, "%s|%s",
495 getenv("POUNDER_LOGDIR"), fname);
496
497 fd = strlen(buf2);
498 for (tmp = (index(buf2, '|') - buf2); tmp < fd; tmp++) {
499 if (buf2[tmp] == '/') {
500 buf2[tmp] = '-';
501 } else if (buf2[tmp] == '|') {
502 buf2[tmp] = '/';
503 }
504 }
505
506 // make it so that we have a way to get back to the
507 // original console.
508 tmp = dup2(1, 3);
509 if (tmp < 0) {
510 perror("dup(stdout, 3)");
511 exit(-1);
512 }
513 // reroute stdout/stderr
514 fd = open(buf2, O_RDWR | O_CREAT | O_TRUNC | O_SYNC,
515 S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);
516 if (fd < 0) {
517 perror(buf2);
518 exit(-1);
519 }
520 close(1);
521 tmp = dup2(fd, 1);
522 if (tmp < 0) {
523 perror("dup(log, 1)");
524 exit(-1);
525 }
526 close(2);
527 tmp = dup2(fd, 2);
528 if (tmp < 0) {
529 perror("dup(log, 2)");
530 exit(-1);
531 }
532 close(fd);
533
534 // let us construct the absolute pathname of the test.
535 // first find the current directory
536 if (getcwd(buf, TEST_PATH_LEN) == NULL) {
537 perror("getcwd");
538 exit(-1);
539 }
540 // then splice cwd + fname
541 snprintf(buf2, TEST_PATH_LEN, "%s/%s", buf, fname);
542
543 // find the location of the last slash
544 last_slash = rindex(buf2, '/');
545
546 if (last_slash != NULL) {
547 // copy the filename part into a new buffer
548 snprintf(buf, TEST_PATH_LEN, "./%s", last_slash + 1);
549
550 // truncate at the last slash
551 *last_slash = 0;
552
553 // and chdir
554 if (chdir(buf2) != 0) {
555 perror(buf2);
556 exit(-1);
557 }
558 // reassign variables
559 fname = buf;
560 }
561 // spawn the process
562 execlp(fname, fname, NULL);
563
564 // If we get here, we can't run the test.
565 perror(fname);
566 exit(-1);
567 }
568
569 tmp = errno;
570 /* yield for a short while, so that the test has
571 * a little bit of time to run.
572 */
573 usleep(TEST_FORK_WAIT);
574 errno = tmp;
575
576 return pid;
577 }
578
579 /**
580 * Adds a child process to either the running-test or running-daemon
581 * list.
582 */
note_child(pid_t pid,char * fname,char type)583 static void note_child(pid_t pid, char *fname, char type)
584 {
585 if (type == 'T') {
586 note_process(pid, fname);
587 } else if (type == 'D') {
588 note_daemon(pid, fname);
589 } else {
590 pounder_fprintf(stdout,
591 "Don't know what to do with child `%s' of type %c.\n",
592 fname, type);
593 }
594 }
595
596 /**
597 * Process a directory--for each entry in a directory, execute files or spawn
598 * a new copy of ourself on the new directory. Process execution is subject to
599 * these rules:
600 *
601 * - Test files that start with the same number '00foo' and '00bar' are allowed
602 * to run simultaneously.
603 * - Test files are run in order of number and then name.
604 *
605 * If a the fork fails, bit 1 of the return code is set. If a
606 * program runs but fails, bit 2 is set.
607 */
process_dir(const char * fname)608 static int process_dir(const char *fname)
609 {
610 struct dirent **namelist;
611 int i, result = 0;
612 char buf[TEST_PATH_LEN];
613 int curr_level_num = -1;
614 int test_level_num;
615 pid_t pid;
616 int children_ok = 1;
617
618 pounder_fprintf(stdout, "%s: Entering directory.\n", fname);
619
620 i = scandir(fname, &namelist, test_filter,
621 (int (*)(const void *, const void *))test_sort);
622 if (i < 0) {
623 perror(fname);
624 return -1;
625 }
626
627 while (i--) {
628 /* determine level number */
629 test_level_num = ((namelist[i]->d_name[1] - '0') * 10)
630 + (namelist[i]->d_name[2] - '0');
631
632 if (curr_level_num == -1) {
633 curr_level_num = test_level_num;
634 }
635
636 if (curr_level_num != test_level_num) {
637 children_ok &= wait_for_pids();
638 curr_level_num = test_level_num;
639 }
640
641 snprintf(buf, TEST_PATH_LEN, "%s/%s", fname,
642 namelist[i]->d_name);
643 if (is_directory(buf)) {
644 pid = fork();
645 if (pid == 0) {
646 if (setpgrp() < 0) {
647 perror("setpgid");
648 }
649 // spawn a new copy of ourself.
650 execl(progname, progname, buf, NULL);
651
652 perror(progname);
653 exit(-1);
654 }
655 } else {
656 pid = spawn_test(buf);
657 }
658
659 if (pid < 0) {
660 perror("fork");
661 result |= 1;
662 free(namelist[i]);
663 continue;
664 }
665
666 note_child(pid, buf, namelist[i]->d_name[0]);
667
668 free(namelist[i]);
669 }
670 free(namelist);
671
672 /* wait for remaining runners */
673 children_ok &= wait_for_pids();
674 if (children_ok == 0) {
675 result |= 2;
676 }
677
678 pounder_fprintf(stdout, "%s: Leaving directory.\n", fname);
679
680 return result;
681 }
682