• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <math.h>
2 #include "json.h"
3 #include "idletime.h"
4 
5 static volatile struct idle_prof_common ipc;
6 
7 /*
8  * Get time to complete an unit work on a particular cpu.
9  * The minimum number in CALIBRATE_RUNS runs is returned.
10  */
calibrate_unit(unsigned char * data)11 static double calibrate_unit(unsigned char *data)
12 {
13 	unsigned long t, i, j, k;
14 	struct timeval tps;
15 	double tunit = 0.0;
16 
17 	for (i = 0; i < CALIBRATE_RUNS; i++) {
18 
19 		fio_gettime(&tps, NULL);
20 		/* scale for less variance */
21 		for (j = 0; j < CALIBRATE_SCALE; j++) {
22 			/* unit of work */
23 			for (k=0; k < page_size; k++) {
24 				data[(k + j) % page_size] = k % 256;
25 				/*
26 				 * we won't see STOP here. this is to match
27 				 * the same statement in the profiling loop.
28 				 */
29 				if (ipc.status == IDLE_PROF_STATUS_PROF_STOP)
30 					return 0.0;
31 			}
32 		}
33 
34 		t = utime_since_now(&tps);
35 		if (!t)
36 			continue;
37 
38 		/* get the minimum time to complete CALIBRATE_SCALE units */
39 		if ((i == 0) || ((double)t < tunit))
40 			tunit = (double)t;
41 	}
42 
43 	return tunit / CALIBRATE_SCALE;
44 }
45 
set_cpu_affinity(struct idle_prof_thread * ipt)46 static int set_cpu_affinity(struct idle_prof_thread *ipt)
47 {
48 #if defined(FIO_HAVE_CPU_AFFINITY)
49 	os_cpu_mask_t cpu_mask;
50 
51 	memset(&cpu_mask, 0, sizeof(cpu_mask));
52 	fio_cpu_set(&cpu_mask, ipt->cpu);
53 
54 	if (fio_setaffinity(gettid(), cpu_mask)) {
55 		log_err("fio: fio_setaffinity failed\n");
56 		return -1;
57 	}
58 
59 	return 0;
60 #else
61 	log_err("fio: fio_setaffinity not supported\n");
62 	return -1;
63 #endif
64 }
65 
idle_prof_thread_fn(void * data)66 static void *idle_prof_thread_fn(void *data)
67 {
68 	int retval;
69 	unsigned long j, k;
70 	struct idle_prof_thread *ipt = data;
71 
72 	/* wait for all threads are spawned */
73 	pthread_mutex_lock(&ipt->init_lock);
74 
75 	/* exit if any other thread failed to start */
76 	if (ipc.status == IDLE_PROF_STATUS_ABORT) {
77 		pthread_mutex_unlock(&ipt->init_lock);
78 		return NULL;
79 	}
80 
81 	retval = set_cpu_affinity(ipt);
82 	if (retval == -1) {
83 		ipt->state = TD_EXITED;
84 		pthread_mutex_unlock(&ipt->init_lock);
85 		return NULL;
86         }
87 
88 	ipt->cali_time = calibrate_unit(ipt->data);
89 
90 	/* delay to set IDLE class till now for better calibration accuracy */
91 #if defined(CONFIG_SCHED_IDLE)
92 	if ((retval = fio_set_sched_idle()))
93 		log_err("fio: fio_set_sched_idle failed\n");
94 #else
95 	retval = -1;
96 	log_err("fio: fio_set_sched_idle not supported\n");
97 #endif
98 	if (retval == -1) {
99 		ipt->state = TD_EXITED;
100 		pthread_mutex_unlock(&ipt->init_lock);
101 		return NULL;
102 	}
103 
104 	ipt->state = TD_INITIALIZED;
105 
106 	/* signal the main thread that calibration is done */
107 	pthread_cond_signal(&ipt->cond);
108 	pthread_mutex_unlock(&ipt->init_lock);
109 
110 	/* wait for other calibration to finish */
111 	pthread_mutex_lock(&ipt->start_lock);
112 
113 	/* exit if other threads failed to initialize */
114 	if (ipc.status == IDLE_PROF_STATUS_ABORT) {
115 		pthread_mutex_unlock(&ipt->start_lock);
116 		return NULL;
117 	}
118 
119 	/* exit if we are doing calibration only */
120 	if (ipc.status == IDLE_PROF_STATUS_CALI_STOP) {
121 		pthread_mutex_unlock(&ipt->start_lock);
122 		return NULL;
123 	}
124 
125 	fio_gettime(&ipt->tps, NULL);
126 	ipt->state = TD_RUNNING;
127 
128 	j = 0;
129 	while (1) {
130 		for (k = 0; k < page_size; k++) {
131 			ipt->data[(k + j) % page_size] = k % 256;
132 			if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) {
133 				fio_gettime(&ipt->tpe, NULL);
134 				goto idle_prof_done;
135 			}
136 		}
137 		j++;
138 	}
139 
140 idle_prof_done:
141 
142 	ipt->loops = j + (double) k / page_size;
143 	ipt->state = TD_EXITED;
144 	pthread_mutex_unlock(&ipt->start_lock);
145 
146 	return NULL;
147 }
148 
149 /* calculate mean and standard deviation to complete an unit of work */
calibration_stats(void)150 static void calibration_stats(void)
151 {
152 	int i;
153 	double sum = 0.0, var = 0.0;
154 	struct idle_prof_thread *ipt;
155 
156 	for (i = 0; i < ipc.nr_cpus; i++) {
157 		ipt = &ipc.ipts[i];
158 		sum += ipt->cali_time;
159 	}
160 
161 	ipc.cali_mean = sum/ipc.nr_cpus;
162 
163 	for (i = 0; i < ipc.nr_cpus; i++) {
164 		ipt = &ipc.ipts[i];
165 		var += pow(ipt->cali_time-ipc.cali_mean, 2);
166 	}
167 
168 	ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1));
169 }
170 
fio_idle_prof_init(void)171 void fio_idle_prof_init(void)
172 {
173 	int i, ret;
174 	struct timeval tp;
175 	struct timespec ts;
176 	pthread_attr_t tattr;
177 	struct idle_prof_thread *ipt;
178 
179 	ipc.nr_cpus = cpus_online();
180 	ipc.status = IDLE_PROF_STATUS_OK;
181 
182 	if (ipc.opt == IDLE_PROF_OPT_NONE)
183 		return;
184 
185 	if ((ret = pthread_attr_init(&tattr))) {
186 		log_err("fio: pthread_attr_init %s\n", strerror(ret));
187 		return;
188 	}
189 	if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) {
190 		log_err("fio: pthread_attr_setscope %s\n", strerror(ret));
191 		return;
192 	}
193 
194 	ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread));
195 	if (!ipc.ipts) {
196 		log_err("fio: malloc failed\n");
197 		return;
198 	}
199 
200 	ipc.buf = malloc(ipc.nr_cpus * page_size);
201 	if (!ipc.buf) {
202 		log_err("fio: malloc failed\n");
203 		free(ipc.ipts);
204 		return;
205 	}
206 
207 	/*
208 	 * profiling aborts on any single thread failure since the
209 	 * result won't be accurate if any cpu is not used.
210 	 */
211 	for (i = 0; i < ipc.nr_cpus; i++) {
212 		ipt = &ipc.ipts[i];
213 
214 		ipt->cpu = i;
215 		ipt->state = TD_NOT_CREATED;
216 		ipt->data = (unsigned char *)(ipc.buf + page_size * i);
217 
218 		if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) {
219 			ipc.status = IDLE_PROF_STATUS_ABORT;
220 			log_err("fio: pthread_mutex_init %s\n", strerror(ret));
221 			break;
222 		}
223 
224 		if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) {
225 			ipc.status = IDLE_PROF_STATUS_ABORT;
226 			log_err("fio: pthread_mutex_init %s\n", strerror(ret));
227 			break;
228 		}
229 
230 		if ((ret = pthread_cond_init(&ipt->cond, NULL))) {
231 			ipc.status = IDLE_PROF_STATUS_ABORT;
232 			log_err("fio: pthread_cond_init %s\n", strerror(ret));
233 			break;
234 		}
235 
236 		/* make sure all threads are spawned before they start */
237 		pthread_mutex_lock(&ipt->init_lock);
238 
239 		/* make sure all threads finish init before profiling starts */
240 		pthread_mutex_lock(&ipt->start_lock);
241 
242 		if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) {
243 			ipc.status = IDLE_PROF_STATUS_ABORT;
244 			log_err("fio: pthread_create %s\n", strerror(ret));
245 			break;
246 		} else
247 			ipt->state = TD_CREATED;
248 
249 		if ((ret = pthread_detach(ipt->thread))) {
250 			/* log error and let the thread spin */
251 			log_err("fio: pthread_detatch %s\n", strerror(ret));
252 		}
253 	}
254 
255 	/*
256 	 * let good threads continue so that they can exit
257 	 * if errors on other threads occurred previously.
258 	 */
259 	for (i = 0; i < ipc.nr_cpus; i++) {
260 		ipt = &ipc.ipts[i];
261 		pthread_mutex_unlock(&ipt->init_lock);
262 	}
263 
264 	if (ipc.status == IDLE_PROF_STATUS_ABORT)
265 		return;
266 
267 	/* wait for calibration to finish */
268 	for (i = 0; i < ipc.nr_cpus; i++) {
269 		ipt = &ipc.ipts[i];
270 		pthread_mutex_lock(&ipt->init_lock);
271 		while ((ipt->state != TD_EXITED) &&
272 		       (ipt->state!=TD_INITIALIZED)) {
273 			fio_gettime(&tp, NULL);
274 			ts.tv_sec = tp.tv_sec + 1;
275 			ts.tv_nsec = tp.tv_usec * 1000;
276 			pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts);
277 		}
278 		pthread_mutex_unlock(&ipt->init_lock);
279 
280 		/*
281 		 * any thread failed to initialize would abort other threads
282 		 * later after fio_idle_prof_start.
283 		 */
284 		if (ipt->state == TD_EXITED)
285 			ipc.status = IDLE_PROF_STATUS_ABORT;
286 	}
287 
288 	if (ipc.status != IDLE_PROF_STATUS_ABORT)
289 		calibration_stats();
290 	else
291 		ipc.cali_mean = ipc.cali_stddev = 0.0;
292 
293 	if (ipc.opt == IDLE_PROF_OPT_CALI)
294 		ipc.status = IDLE_PROF_STATUS_CALI_STOP;
295 }
296 
fio_idle_prof_start(void)297 void fio_idle_prof_start(void)
298 {
299 	int i;
300 	struct idle_prof_thread *ipt;
301 
302 	if (ipc.opt == IDLE_PROF_OPT_NONE)
303 		return;
304 
305 	/* unlock regardless abort is set or not */
306 	for (i = 0; i < ipc.nr_cpus; i++) {
307 		ipt = &ipc.ipts[i];
308 		pthread_mutex_unlock(&ipt->start_lock);
309 	}
310 }
311 
fio_idle_prof_stop(void)312 void fio_idle_prof_stop(void)
313 {
314 	int i;
315 	uint64_t runt;
316 	struct timeval tp;
317 	struct timespec ts;
318 	struct idle_prof_thread *ipt;
319 
320 	if (ipc.opt == IDLE_PROF_OPT_NONE)
321 		return;
322 
323 	if (ipc.opt == IDLE_PROF_OPT_CALI)
324 		return;
325 
326 	ipc.status = IDLE_PROF_STATUS_PROF_STOP;
327 
328 	/* wait for all threads to exit from profiling */
329 	for (i = 0; i < ipc.nr_cpus; i++) {
330 		ipt = &ipc.ipts[i];
331 		pthread_mutex_lock(&ipt->start_lock);
332 		while ((ipt->state != TD_EXITED) &&
333 		       (ipt->state!=TD_NOT_CREATED)) {
334 			fio_gettime(&tp, NULL);
335 			ts.tv_sec = tp.tv_sec + 1;
336 			ts.tv_nsec = tp.tv_usec * 1000;
337 			/* timed wait in case a signal is not received */
338 			pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts);
339 		}
340 		pthread_mutex_unlock(&ipt->start_lock);
341 
342 		/* calculate idleness */
343 		if (ipc.cali_mean != 0.0) {
344 			runt = utime_since(&ipt->tps, &ipt->tpe);
345 			if (runt)
346 				ipt->idleness = ipt->loops * ipc.cali_mean / runt;
347 			else
348 				ipt->idleness = 0.0;
349 		} else
350 			ipt->idleness = 0.0;
351 	}
352 
353 	/*
354 	 * memory allocations are freed via explicit fio_idle_prof_cleanup
355 	 * after profiling stats are collected by apps.
356 	 */
357 }
358 
359 /*
360  * return system idle percentage when cpu is -1;
361  * return one cpu idle percentage otherwise.
362  */
fio_idle_prof_cpu_stat(int cpu)363 static double fio_idle_prof_cpu_stat(int cpu)
364 {
365 	int i, nr_cpus = ipc.nr_cpus;
366 	struct idle_prof_thread *ipt;
367 	double p = 0.0;
368 
369 	if (ipc.opt == IDLE_PROF_OPT_NONE)
370 		return 0.0;
371 
372 	if ((cpu >= nr_cpus) || (cpu < -1)) {
373 		log_err("fio: idle profiling invalid cpu index\n");
374 		return 0.0;
375 	}
376 
377 	if (cpu == -1) {
378 		for (i = 0; i < nr_cpus; i++) {
379 			ipt = &ipc.ipts[i];
380 			p += ipt->idleness;
381 		}
382 		p /= nr_cpus;
383 	} else {
384 		ipt = &ipc.ipts[cpu];
385 		p = ipt->idleness;
386 	}
387 
388 	return p * 100.0;
389 }
390 
fio_idle_prof_cleanup(void)391 static void fio_idle_prof_cleanup(void)
392 {
393 	if (ipc.ipts) {
394 		free(ipc.ipts);
395 		ipc.ipts = NULL;
396 	}
397 
398 	if (ipc.buf) {
399 		free(ipc.buf);
400 		ipc.buf = NULL;
401 	}
402 }
403 
fio_idle_prof_parse_opt(const char * args)404 int fio_idle_prof_parse_opt(const char *args)
405 {
406 	ipc.opt = IDLE_PROF_OPT_NONE; /* default */
407 
408 	if (!args) {
409 		log_err("fio: empty idle-prof option string\n");
410 		return -1;
411 	}
412 
413 #if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE)
414 	if (strcmp("calibrate", args) == 0) {
415 		ipc.opt = IDLE_PROF_OPT_CALI;
416 		fio_idle_prof_init();
417 		fio_idle_prof_start();
418 		fio_idle_prof_stop();
419 		show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL);
420 		return 1;
421 	} else if (strcmp("system", args) == 0) {
422 		ipc.opt = IDLE_PROF_OPT_SYSTEM;
423 		return 0;
424 	} else if (strcmp("percpu", args) == 0) {
425 		ipc.opt = IDLE_PROF_OPT_PERCPU;
426 		return 0;
427 	} else {
428 		log_err("fio: incorrect idle-prof option: %s\n", args);
429 		return -1;
430 	}
431 #else
432 	log_err("fio: idle-prof not supported on this platform\n");
433 	return -1;
434 #endif
435 }
436 
show_idle_prof_stats(int output,struct json_object * parent)437 void show_idle_prof_stats(int output, struct json_object *parent)
438 {
439 	int i, nr_cpus = ipc.nr_cpus;
440 	struct json_object *tmp;
441 	char s[MAX_CPU_STR_LEN];
442 
443 	if (output == FIO_OUTPUT_NORMAL) {
444 		if (ipc.opt > IDLE_PROF_OPT_CALI)
445 			log_info("\nCPU idleness:\n");
446 		else if (ipc.opt == IDLE_PROF_OPT_CALI)
447 			log_info("CPU idleness:\n");
448 
449 		if (ipc.opt >= IDLE_PROF_OPT_SYSTEM)
450 			log_info("  system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1));
451 
452 		if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
453 			log_info("  percpu: %3.2f%%", fio_idle_prof_cpu_stat(0));
454 			for (i = 1; i < nr_cpus; i++)
455 				log_info(", %3.2f%%", fio_idle_prof_cpu_stat(i));
456 			log_info("\n");
457 		}
458 
459 		if (ipc.opt >= IDLE_PROF_OPT_CALI) {
460 			log_info("  unit work: mean=%3.2fus,", ipc.cali_mean);
461 			log_info(" stddev=%3.2f\n", ipc.cali_stddev);
462 		}
463 
464 		/* dynamic mem allocations can now be freed */
465 		if (ipc.opt != IDLE_PROF_OPT_NONE)
466 			fio_idle_prof_cleanup();
467 
468 		return;
469 	}
470 
471 	if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output == FIO_OUTPUT_JSON)) {
472 		if (!parent)
473 			return;
474 
475 		tmp = json_create_object();
476 		if (!tmp)
477 			return;
478 
479 		json_object_add_value_object(parent, "cpu_idleness", tmp);
480 		json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1));
481 
482 		if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
483 			for (i = 0; i < nr_cpus; i++) {
484 				snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i);
485 				json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i));
486 			}
487 		}
488 
489 		json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean);
490 		json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev);
491 
492 		fio_idle_prof_cleanup();
493 	}
494 }
495