1 char netcpu_looper_id[]="\
2 @(#)netcpu_looper.c (c) Copyright 2005-2007. Version 2.4.3";
3
4 /* netcpu_looper.c
5
6 Implement the soaker process specific portions of netperf CPU
7 utilization measurements. These are broken-out into a separate file
8 to make life much nicer over in netlib.c which had become a maze of
9 twisty, CPU-util-related, #ifdefs, all different. raj 2005-01-26
10 */
11
12 #ifdef HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <stdio.h>
17
18 #ifdef HAVE_FCNTL_H
19 # include <fcntl.h>
20 #endif
21 #if HAVE_UNISTD_H
22 # include <unistd.h>
23 #endif
24 #if defined(HAVE_MMAP) || defined(HAVE_SYS_MMAN_H)
25 # include <sys/mman.h>
26 #else
27 # error netcpu_looper requires mmap
28 #endif
29
30 #if TIME_WITH_SYS_TIME
31 # include <sys/time.h>
32 # include <time.h>
33 #else
34 # if HAVE_SYS_TIME_H
35 # include <sys/time.h>
36 # else
37 # include <time.h>
38 # endif
39 #endif
40
41 #if HAVE_SYS_TYPES_H
42 # include <sys/types.h>
43 #endif
44
45 #if HAVE_SYS_WAIT_H
46 # include <sys/wait.h>
47 #endif
48
49 #ifdef HAVE_SIGNAL_H
50 #include <signal.h>
51 #endif
52
53 #ifdef HAVE_ERRNO_H
54 #include <errno.h>
55 #endif
56
57 #include "netsh.h"
58 #include "netlib.h"
59
60 #define PAGES_PER_CHILD 2
61
62 /* the lib_start_count and lib_end_count arrays hold the starting
63 and ending values of whatever is counting when the system is
64 idle. The rate at which this increments during a test is compared
65 with a previous calibrarion to arrive at a CPU utilization
66 percentage. raj 2005-01-26 */
67 static uint64_t lib_start_count[MAXCPUS];
68 static uint64_t lib_end_count[MAXCPUS];
69
70 static int *cpu_mappings;
71
72 static int lib_idle_fd;
73 static uint64_t *lib_idle_address[MAXCPUS];
74 static long *lib_base_pointer;
75 static pid_t lib_idle_pids[MAXCPUS];
76 static int lib_loopers_running=0;
77
78 /* we used to use this code to bind the loopers, but since we have
79 decided to enable processor affinity for the actual
80 netperf/netserver processes we will use that affinity routine,
81 which happens to know about more systems than this */
82
83 #ifdef NOTDEF
84 static void
bind_to_processor(int child_num)85 bind_to_processor(int child_num)
86 {
87 /* This routine will bind the calling process to a particular */
88 /* processor. We are not choosy as to which processor, so it will be */
89 /* the process id mod the number of processors - shifted by one for */
90 /* those systems which name processor starting from one instead of */
91 /* zero. on those systems where I do not yet know how to bind a */
92 /* process to a processor, this routine will be a no-op raj 10/95 */
93
94 /* just as a reminder, this is *only* for the looper processes, not */
95 /* the actual measurement processes. those will, should, MUST float */
96 /* or not float from CPU to CPU as controlled by the operating */
97 /* system defaults. raj 12/95 */
98
99 #ifdef __hpux
100 #include <sys/syscall.h>
101 #include <sys/mp.h>
102
103 int old_cpu = -2;
104
105 if (debug) {
106 fprintf(where,
107 "child %d asking for CPU %d as pid %d with %d CPUs\n",
108 child_num,
109 (child_num % lib_num_loc_cpus),
110 getpid(),
111 lib_num_loc_cpus);
112 fflush(where);
113 }
114
115 SETPROCESS((child_num % lib_num_loc_cpus), getpid());
116 return;
117
118 #else
119 #if defined(__sun) && defined(__SVR4)
120 /* should only be Solaris */
121 #include <sys/processor.h>
122 #include <sys/procset.h>
123
124 int old_binding;
125
126 if (debug) {
127 fprintf(where,
128 "bind_to_processor: child %d asking for CPU %d as pid %d with %d CPUs\n",
129 child_num,
130 (child_num % lib_num_loc_cpus),
131 getpid(),
132 lib_num_loc_cpus);
133 fflush(where);
134 }
135
136 if (processor_bind(P_PID,
137 getpid(),
138 (child_num % lib_num_loc_cpus),
139 &old_binding) != 0) {
140 fprintf(where,"bind_to_processor: unable to perform processor binding\n");
141 fprintf(where," errno %d\n",errno);
142 fflush(where);
143 }
144 return;
145 #else
146 #ifdef WIN32
147
148 if (!SetThreadAffinityMask(GetCurrentThread(), (ULONG_PTR)1 << (child_num % lib_num_loc_cpus))) {
149 perror("SetThreadAffinityMask failed");
150 fflush(stderr);
151 }
152
153 if (debug) {
154 fprintf(where,
155 "bind_to_processor: child %d asking for CPU %d of %d CPUs\n",
156 child_num,
157 (child_num % lib_num_loc_cpus),
158 lib_num_loc_cpus);
159 fflush(where);
160 }
161
162 #endif
163 return;
164 #endif /* __sun && _SVR4 */
165 #endif /* __hpux */
166 }
167 #endif
168
169 /* sit_and_spin will just spin about incrementing a value */
170 /* this value will either be in a memory mapped region on Unix shared */
171 /* by each looper process, or something appropriate on Windows/NT */
172 /* (malloc'd or such). This routine is reasonably ugly in that it has */
173 /* priority manipulating code for lots of different operating */
174 /* systems. This routine never returns. raj 1/96 */
175
176 static void
sit_and_spin(int child_index)177 sit_and_spin(int child_index)
178
179 {
180 uint64_t *my_counter_ptr;
181
182 /* only use C stuff if we are not WIN32 unless and until we */
183 /* switch from CreateThread to _beginthread. raj 1/96 */
184 #ifndef WIN32
185 /* we are the child. we could decide to exec some separate */
186 /* program, but that doesn't really seem worthwhile - raj 4/95 */
187 if (debug > 1) {
188 fprintf(where,
189 "Looper child %d is born, pid %d\n",
190 child_index,
191 getpid());
192 fflush(where);
193 }
194
195 #endif /* WIN32 */
196
197 /* reset our base pointer to be at the appropriate offset */
198 my_counter_ptr = (uint64_t *) ((char *)lib_base_pointer +
199 (netlib_get_page_size() *
200 PAGES_PER_CHILD * child_index));
201
202 /* in the event we are running on an MP system, it would */
203 /* probably be good to bind the soaker processes to specific */
204 /* processors. I *think* this is the most reasonable thing to */
205 /* do, and would be closes to simulating the information we get */
206 /* on HP-UX with pstat. I could put all the system-specific code */
207 /* here, but will "abstract it into another routine to keep this */
208 /* area more readable. I'll probably do the same thine with the */
209 /* "low pri code" raj 10/95 */
210
211 /* since we are "flying blind" wrt where we should bind the looper
212 processes, we want to use the cpu_map that was prepared by netlib
213 rather than assume that the CPU ids on the system start at zero
214 and are contiguous. raj 2006-04-03 */
215 bind_to_specific_processor(child_index % lib_num_loc_cpus,1);
216
217 for (*my_counter_ptr = 0L;
218 ;
219 (*my_counter_ptr)++) {
220 if (!(*lib_base_pointer % 1)) {
221 /* every once and again, make sure that our process priority is */
222 /* nice and low. also, by making system calls, it may be easier */
223 /* for us to be pre-empted by something that needs to do useful */
224 /* work - like the thread of execution actually sending and */
225 /* receiving data across the network :) */
226 #ifdef _AIX
227 int pid,prio;
228
229 prio = PRIORITY;
230 pid = getpid();
231 /* if you are not root, this call will return EPERM - why one */
232 /* cannot change one's own priority to lower value is beyond */
233 /* me. raj 2/26/96 */
234 setpri(pid, prio);
235 #else /* _AIX */
236 #ifdef __sgi
237 int pid,prio;
238
239 prio = PRIORITY;
240 pid = getpid();
241 schedctl(NDPRI, pid, prio);
242 sginap(0);
243 #else /* __sgi */
244 #ifdef WIN32
245 SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_IDLE);
246 #else /* WIN32 */
247 #if defined(__sun) && defined(__SVR4)
248 #include <sys/types.h>
249 #include <sys/priocntl.h>
250 #include <sys/rtpriocntl.h>
251 #include <sys/tspriocntl.h>
252 /* I would *really* like to know how to use priocntl to make the */
253 /* priority low for this looper process. however, either my mind */
254 /* is addled, or the manpage in section two for priocntl is not */
255 /* terribly helpful - for one, it has no examples :( so, if you */
256 /* can help, I'd love to hear from you. in the meantime, we will */
257 /* rely on nice(39). raj 2/26/96 */
258 nice(39);
259 #else /* __sun && __SVR4 */
260 nice(39);
261 #endif /* __sun && _SVR4 */
262 #endif /* WIN32 */
263 #endif /* __sgi */
264 #endif /* _AIX */
265 }
266 }
267 }
268
269
270
271 /* this routine will start all the looper processes or threads for */
272 /* measuring CPU utilization. */
273
274 static void
start_looper_processes()275 start_looper_processes()
276 {
277
278 unsigned int i, file_size;
279
280 /* we want at least two pages for each processor. the */
281 /* child for any one processor will write to the first of his two */
282 /* pages, and the second page will be a buffer in case there is page */
283 /* prefetching. if your system pre-fetches more than a single page, */
284 /* well, you'll have to modify this or live with it :( raj 4/95 */
285
286 file_size = ((netlib_get_page_size() * PAGES_PER_CHILD) *
287 lib_num_loc_cpus);
288
289 #ifndef WIN32
290
291 /* we we are not using WINDOWS NT (or 95 actually :), then we want */
292 /* to create a memory mapped region so we can see all the counting */
293 /* rates of the loopers */
294
295 /* could we just use an anonymous memory region for this? it is */
296 /* possible that using a mmap()'ed "real" file, while convenient for */
297 /* debugging, could result in some filesystem activity - like */
298 /* metadata updates? raj 4/96 */
299 lib_idle_fd = open("/tmp/netperf_cpu",O_RDWR | O_CREAT | O_EXCL);
300
301 if (lib_idle_fd == -1) {
302 fprintf(where,"create_looper: file creation; errno %d\n",errno);
303 fflush(where);
304 exit(1);
305 }
306
307 if (chmod("/tmp/netperf_cpu",0644) == -1) {
308 fprintf(where,"create_looper: chmod; errno %d\n",errno);
309 fflush(where);
310 exit(1);
311 }
312
313 /* with the file descriptor in place, lets be sure that the file is */
314 /* large enough. */
315
316 if (truncate("/tmp/netperf_cpu",file_size) == -1) {
317 fprintf(where,"create_looper: truncate: errno %d\n",errno);
318 fflush(where);
319 exit(1);
320 }
321
322 /* the file should be large enough now, so we can mmap it */
323
324 /* if the system does not have MAP_VARIABLE, just define it to */
325 /* be zero. it is only used/needed on HP-UX (?) raj 4/95 */
326 #ifndef MAP_VARIABLE
327 #define MAP_VARIABLE 0x0000
328 #endif /* MAP_VARIABLE */
329 #ifndef MAP_FILE
330 #define MAP_FILE 0x0000
331 #endif /* MAP_FILE */
332 if ((lib_base_pointer = (long *)mmap(NULL,
333 file_size,
334 PROT_READ | PROT_WRITE,
335 MAP_FILE | MAP_SHARED | MAP_VARIABLE,
336 lib_idle_fd,
337 0)) == (long *)-1) {
338 fprintf(where,"create_looper: mmap: errno %d\n",errno);
339 fflush(where);
340 exit(1);
341 }
342
343
344 if (debug > 1) {
345 fprintf(where,"num CPUs %d, file_size %d, lib_base_pointer %p\n",
346 lib_num_loc_cpus,
347 file_size,
348 lib_base_pointer);
349 fflush(where);
350 }
351
352 /* we should have a valid base pointer. lets fork */
353
354 for (i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
355 switch (lib_idle_pids[i] = fork()) {
356 case -1:
357 perror("netperf: fork");
358 exit(1);
359 case 0:
360 /* we are the child. we could decide to exec some separate */
361 /* program, but that doesn't really seem worthwhile - raj 4/95 */
362
363 signal(SIGTERM, SIG_DFL);
364 sit_and_spin(i);
365
366 /* we should never really get here, but if we do, just exit(0) */
367 exit(0);
368 break;
369 default:
370 /* we must be the parent */
371 lib_idle_address[i] = (uint64_t *) ((char *)lib_base_pointer +
372 (netlib_get_page_size() *
373 PAGES_PER_CHILD * i));
374 if (debug) {
375 fprintf(where,"lib_idle_address[%d] is %p\n",
376 i,
377 lib_idle_address[i]);
378 fflush(where);
379 }
380 }
381 }
382 #else
383 /* we are compiled -DWIN32 */
384 if ((lib_base_pointer = malloc(file_size)) == NULL) {
385 fprintf(where,
386 "create_looper_process could not malloc %d bytes\n",
387 file_size);
388 fflush(where);
389 exit(1);
390 }
391
392 /* now, create all the threads */
393 for(i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
394 long place_holder;
395 if ((lib_idle_pids[i] = CreateThread(0,
396 0,
397 (LPTHREAD_START_ROUTINE)sit_and_spin,
398 (LPVOID)(ULONG_PTR)i,
399 0,
400 &place_holder)) == NULL ) {
401 fprintf(where,
402 "create_looper_process: CreateThread failed\n");
403 fflush(where);
404 /* I wonder if I need to look for other threads to kill? */
405 exit(1);
406 }
407 lib_idle_address[i] = (long *) ((char *)lib_base_pointer +
408 (netlib_get_page_size() *
409 PAGES_PER_CHILD * i));
410 if (debug) {
411 fprintf(where,"lib_idle_address[%d] is %p\n",
412 i,
413 lib_idle_address[i]);
414 fflush(where);
415 }
416 }
417 #endif /* WIN32 */
418
419 /* we need to have the looper processes settled-in before we do */
420 /* anything with them, so lets sleep for say 30 seconds. raj 4/95 */
421
422 sleep(30);
423 }
424
425 void
cpu_util_init(void)426 cpu_util_init(void)
427 {
428 cpu_method = LOOPER;
429
430 /* we want to get the looper processes going */
431 if (!lib_loopers_running) {
432 start_looper_processes();
433 lib_loopers_running = 1;
434 }
435
436 return;
437 }
438
439 /* clean-up any left-over CPU util resources - looper processes,
440 files, whatever. raj 2005-01-26 */
441 void
cpu_util_terminate()442 cpu_util_terminate() {
443
444 #ifdef WIN32
445 /* it would seem that if/when the process exits, all the threads */
446 /* will go away too, so I don't think I need any explicit thread */
447 /* killing calls here. raj 1/96 */
448 #else
449
450 int i;
451
452 /* now go through and kill-off all the child processes */
453 for (i = 0; i < lib_num_loc_cpus; i++){
454 /* SIGKILL can leave core files behind - thanks to Steinar Haug */
455 /* for pointing that out. */
456 kill(lib_idle_pids[i],SIGTERM);
457 }
458 lib_loopers_running = 0;
459 /* reap the children */
460 while(waitpid(-1, NULL, WNOHANG) > 0) { }
461
462 /* finally, unlink the mmaped file */
463 munmap((caddr_t)lib_base_pointer,
464 ((netlib_get_page_size() * PAGES_PER_CHILD) *
465 lib_num_loc_cpus));
466 unlink("/tmp/netperf_cpu");
467 #endif
468 return;
469 }
470
471 int
get_cpu_method(void)472 get_cpu_method(void)
473 {
474 return LOOPER;
475 }
476
477 /* calibrate_looper */
478
479 /* Loop a number of iterations, sleeping interval seconds each and */
480 /* count how high the idle counter gets each time. Return the */
481 /* measured cpu rate to the calling routine. raj 4/95 */
482
483 float
calibrate_idle_rate(int iterations,int interval)484 calibrate_idle_rate (int iterations, int interval)
485 {
486
487 uint64_t
488 firstcnt[MAXCPUS],
489 secondcnt[MAXCPUS];
490
491 float
492 elapsed,
493 temp_rate,
494 rate[MAXTIMES],
495 local_maxrate;
496
497 long
498 sec,
499 usec;
500
501 int
502 i,
503 j;
504
505 struct timeval time1, time2 ;
506 struct timezone tz;
507
508 if (iterations > MAXTIMES) {
509 iterations = MAXTIMES;
510 }
511
512 local_maxrate = (float)-1.0;
513
514 for(i = 0; i < iterations; i++) {
515 rate[i] = (float)0.0;
516 for (j = 0; j < lib_num_loc_cpus; j++) {
517 firstcnt[j] = *(lib_idle_address[j]);
518 }
519 gettimeofday (&time1, &tz);
520 sleep(interval);
521 gettimeofday (&time2, &tz);
522
523 if (time2.tv_usec < time1.tv_usec)
524 {
525 time2.tv_usec += 1000000;
526 time2.tv_sec -=1;
527 }
528 sec = time2.tv_sec - time1.tv_sec;
529 usec = time2.tv_usec - time1.tv_usec;
530 elapsed = (float)sec + ((float)usec/(float)1000000.0);
531
532 if(debug) {
533 fprintf(where, "Calibration for counter run: %d\n",i);
534 fprintf(where,"\tsec = %ld usec = %ld\n",sec,usec);
535 fprintf(where,"\telapsed time = %g\n",elapsed);
536 }
537
538 for (j = 0; j < lib_num_loc_cpus; j++) {
539 secondcnt[j] = *(lib_idle_address[j]);
540 if(debug) {
541 /* I know that there are situations where compilers know about */
542 /* long long, but the library fucntions do not... raj 4/95 */
543 fprintf(where,
544 "\tfirstcnt[%d] = 0x%8.8lx%8.8lx secondcnt[%d] = 0x%8.8lx%8.8lx\n",
545 j,
546 (uint32_t)(firstcnt[j]>>32),
547 (uint32_t)(firstcnt[j]&0xffffffff),
548 j,
549 (uint32_t)(secondcnt[j]>>32),
550 (uint32_t)(secondcnt[j]&0xffffffff));
551 }
552 /* we assume that it would wrap no more than once. we also */
553 /* assume that the result of subtracting will "fit" raj 4/95 */
554 temp_rate = (secondcnt[j] >= firstcnt[j]) ?
555 (float)(secondcnt[j] - firstcnt[j])/elapsed :
556 (float)(secondcnt[j]-firstcnt[j]+MAXLONG)/elapsed;
557 if (temp_rate > rate[i]) rate[i] = temp_rate;
558 if(debug) {
559 fprintf(where,"\trate[%d] = %g\n",i,rate[i]);
560 fflush(where);
561 }
562 if (local_maxrate < rate[i]) local_maxrate = rate[i];
563 }
564 }
565 if(debug) {
566 fprintf(where,"\tlocal maxrate = %g per sec. \n",local_maxrate);
567 fflush(where);
568 }
569 return local_maxrate;
570 }
571
572
573 void
get_cpu_idle(uint64_t * res)574 get_cpu_idle (uint64_t *res)
575 {
576 int i;
577
578 for (i = 0; i < lib_num_loc_cpus; i++){
579 res[i] = *lib_idle_address[i];
580 }
581
582 }
583
584 float
calc_cpu_util_internal(float elapsed_time)585 calc_cpu_util_internal(float elapsed_time)
586 {
587 int i;
588 float correction_factor;
589 float actual_rate;
590
591 lib_local_cpu_util = (float)0.0;
592 /* It is possible that the library measured a time other than */
593 /* the one that the user want for the cpu utilization */
594 /* calculations - for example, tests that were ended by */
595 /* watchdog timers such as the udp stream test. We let these */
596 /* tests tell up what the elapsed time should be. */
597
598 if (elapsed_time != 0.0) {
599 correction_factor = (float) 1.0 +
600 ((lib_elapsed - elapsed_time) / elapsed_time);
601 }
602 else {
603 correction_factor = (float) 1.0;
604 }
605
606 for (i = 0; i < lib_num_loc_cpus; i++) {
607
608 /* it would appear that on some systems, in loopback, nice is
609 *very* effective, causing the looper process to stop dead in its
610 tracks. if this happens, we need to ensure that the calculation
611 does not go south. raj 6/95 and if we run completely out of idle,
612 the same thing could in theory happen to the USE_KSTAT path. raj
613 8/2000 */
614
615 if (lib_end_count[i] == lib_start_count[i]) {
616 lib_end_count[i]++;
617 }
618
619 actual_rate = (lib_end_count[i] > lib_start_count[i]) ?
620 (float)(lib_end_count[i] - lib_start_count[i])/lib_elapsed :
621 (float)(lib_end_count[i] - lib_start_count[i] +
622 MAXLONG)/ lib_elapsed;
623 if (debug) {
624 fprintf(where,
625 "calc_cpu_util: actual_rate on processor %d is %f start 0x%8.8lx%8.8lx end 0x%8.8lx%8.8lx\n",
626 i,
627 actual_rate,
628 (uint32_t)(lib_start_count[i]>>32),
629 (uint32_t)(lib_start_count[i]&0xffffffff),
630 (uint32_t)(lib_end_count[i]>>32),
631 (uint32_t)(lib_end_count[i]&0xffffffff));
632 }
633 lib_local_per_cpu_util[i] = (lib_local_maxrate - actual_rate) /
634 lib_local_maxrate * 100;
635 lib_local_cpu_util += lib_local_per_cpu_util[i];
636 }
637 /* we want the average across all n processors */
638 lib_local_cpu_util /= (float)lib_num_loc_cpus;
639
640 lib_local_cpu_util *= correction_factor;
641 return lib_local_cpu_util;
642
643
644 }
645 void
cpu_start_internal(void)646 cpu_start_internal(void)
647 {
648 get_cpu_idle(lib_start_count);
649 return;
650 }
651
652 void
cpu_stop_internal(void)653 cpu_stop_internal(void)
654 {
655 get_cpu_idle(lib_end_count);
656 }
657