• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Memory bandwidth monitoring and allocation library
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  */
11 #include "resctrl.h"
12 
13 #define UNCORE_IMC		"uncore_imc"
14 #define READ_FILE_NAME		"events/cas_count_read"
15 #define WRITE_FILE_NAME		"events/cas_count_write"
16 #define DYN_PMU_PATH		"/sys/bus/event_source/devices"
17 #define SCALE			0.00006103515625
18 #define MAX_IMCS		20
19 #define MAX_TOKENS		5
20 #define READ			0
21 #define WRITE			1
22 
23 #define CON_MBM_LOCAL_BYTES_PATH		\
24 	"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
25 
26 struct membw_read_format {
27 	__u64 value;         /* The value of the event */
28 	__u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
29 	__u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
30 	__u64 id;            /* if PERF_FORMAT_ID */
31 };
32 
33 struct imc_counter_config {
34 	__u32 type;
35 	__u64 event;
36 	__u64 umask;
37 	struct perf_event_attr pe;
38 	struct membw_read_format return_value;
39 	int fd;
40 };
41 
42 static char mbm_total_path[1024];
43 static int imcs;
44 static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
45 static const struct resctrl_test *current_test;
46 
membw_initialize_perf_event_attr(int i,int j)47 void membw_initialize_perf_event_attr(int i, int j)
48 {
49 	memset(&imc_counters_config[i][j].pe, 0,
50 	       sizeof(struct perf_event_attr));
51 	imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type;
52 	imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr);
53 	imc_counters_config[i][j].pe.disabled = 1;
54 	imc_counters_config[i][j].pe.inherit = 1;
55 	imc_counters_config[i][j].pe.exclude_guest = 0;
56 	imc_counters_config[i][j].pe.config =
57 		imc_counters_config[i][j].umask << 8 |
58 		imc_counters_config[i][j].event;
59 	imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
60 	imc_counters_config[i][j].pe.read_format =
61 		PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
62 }
63 
membw_ioctl_perf_event_ioc_reset_enable(int i,int j)64 void membw_ioctl_perf_event_ioc_reset_enable(int i, int j)
65 {
66 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0);
67 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0);
68 }
69 
membw_ioctl_perf_event_ioc_disable(int i,int j)70 void membw_ioctl_perf_event_ioc_disable(int i, int j)
71 {
72 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0);
73 }
74 
75 /*
76  * get_event_and_umask:	Parse config into event and umask
77  * @cas_count_cfg:	Config
78  * @count:		iMC number
79  * @op:			Operation (read/write)
80  */
get_event_and_umask(char * cas_count_cfg,int count,bool op)81 void get_event_and_umask(char *cas_count_cfg, int count, bool op)
82 {
83 	char *token[MAX_TOKENS];
84 	int i = 0;
85 
86 	token[0] = strtok(cas_count_cfg, "=,");
87 
88 	for (i = 1; i < MAX_TOKENS; i++)
89 		token[i] = strtok(NULL, "=,");
90 
91 	for (i = 0; i < MAX_TOKENS - 1; i++) {
92 		if (!token[i])
93 			break;
94 		if (strcmp(token[i], "event") == 0) {
95 			if (op == READ)
96 				imc_counters_config[count][READ].event =
97 				strtol(token[i + 1], NULL, 16);
98 			else
99 				imc_counters_config[count][WRITE].event =
100 				strtol(token[i + 1], NULL, 16);
101 		}
102 		if (strcmp(token[i], "umask") == 0) {
103 			if (op == READ)
104 				imc_counters_config[count][READ].umask =
105 				strtol(token[i + 1], NULL, 16);
106 			else
107 				imc_counters_config[count][WRITE].umask =
108 				strtol(token[i + 1], NULL, 16);
109 		}
110 	}
111 }
112 
open_perf_event(int i,int cpu_no,int j)113 static int open_perf_event(int i, int cpu_no, int j)
114 {
115 	imc_counters_config[i][j].fd =
116 		perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
117 				PERF_FLAG_FD_CLOEXEC);
118 
119 	if (imc_counters_config[i][j].fd == -1) {
120 		fprintf(stderr, "Error opening leader %llx\n",
121 			imc_counters_config[i][j].pe.config);
122 
123 		return -1;
124 	}
125 
126 	return 0;
127 }
128 
129 /* Get type and config (read and write) of an iMC counter */
read_from_imc_dir(char * imc_dir,int count)130 static int read_from_imc_dir(char *imc_dir, int count)
131 {
132 	char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
133 	FILE *fp;
134 
135 	/* Get type of iMC counter */
136 	sprintf(imc_counter_type, "%s%s", imc_dir, "type");
137 	fp = fopen(imc_counter_type, "r");
138 	if (!fp) {
139 		ksft_perror("Failed to open iMC counter type file");
140 
141 		return -1;
142 	}
143 	if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
144 		ksft_perror("Could not get iMC type");
145 		fclose(fp);
146 
147 		return -1;
148 	}
149 	fclose(fp);
150 
151 	imc_counters_config[count][WRITE].type =
152 				imc_counters_config[count][READ].type;
153 
154 	/* Get read config */
155 	sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
156 	fp = fopen(imc_counter_cfg, "r");
157 	if (!fp) {
158 		ksft_perror("Failed to open iMC config file");
159 
160 		return -1;
161 	}
162 	if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) {
163 		ksft_perror("Could not get iMC cas count read");
164 		fclose(fp);
165 
166 		return -1;
167 	}
168 	fclose(fp);
169 
170 	get_event_and_umask(cas_count_cfg, count, READ);
171 
172 	/* Get write config */
173 	sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
174 	fp = fopen(imc_counter_cfg, "r");
175 	if (!fp) {
176 		ksft_perror("Failed to open iMC config file");
177 
178 		return -1;
179 	}
180 	if  (fscanf(fp, "%1023s", cas_count_cfg) <= 0) {
181 		ksft_perror("Could not get iMC cas count write");
182 		fclose(fp);
183 
184 		return -1;
185 	}
186 	fclose(fp);
187 
188 	get_event_and_umask(cas_count_cfg, count, WRITE);
189 
190 	return 0;
191 }
192 
193 /*
194  * A system can have 'n' number of iMC (Integrated Memory Controller)
195  * counters, get that 'n'. For each iMC counter get it's type and config.
196  * Also, each counter has two configs, one for read and the other for write.
197  * A config again has two parts, event and umask.
198  * Enumerate all these details into an array of structures.
199  *
200  * Return: >= 0 on success. < 0 on failure.
201  */
num_of_imcs(void)202 static int num_of_imcs(void)
203 {
204 	char imc_dir[512], *temp;
205 	unsigned int count = 0;
206 	struct dirent *ep;
207 	int ret;
208 	DIR *dp;
209 
210 	dp = opendir(DYN_PMU_PATH);
211 	if (dp) {
212 		while ((ep = readdir(dp))) {
213 			temp = strstr(ep->d_name, UNCORE_IMC);
214 			if (!temp)
215 				continue;
216 
217 			/*
218 			 * imc counters are named as "uncore_imc_<n>", hence
219 			 * increment the pointer to point to <n>. Note that
220 			 * sizeof(UNCORE_IMC) would count for null character as
221 			 * well and hence the last underscore character in
222 			 * uncore_imc'_' need not be counted.
223 			 */
224 			temp = temp + sizeof(UNCORE_IMC);
225 
226 			/*
227 			 * Some directories under "DYN_PMU_PATH" could have
228 			 * names like "uncore_imc_free_running", hence, check if
229 			 * first character is a numerical digit or not.
230 			 */
231 			if (temp[0] >= '0' && temp[0] <= '9') {
232 				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
233 					ep->d_name);
234 				ret = read_from_imc_dir(imc_dir, count);
235 				if (ret) {
236 					closedir(dp);
237 
238 					return ret;
239 				}
240 				count++;
241 			}
242 		}
243 		closedir(dp);
244 		if (count == 0) {
245 			ksft_print_msg("Unable to find iMC counters\n");
246 
247 			return -1;
248 		}
249 	} else {
250 		ksft_perror("Unable to open PMU directory");
251 
252 		return -1;
253 	}
254 
255 	return count;
256 }
257 
initialize_mem_bw_imc(void)258 int initialize_mem_bw_imc(void)
259 {
260 	int imc, j;
261 
262 	imcs = num_of_imcs();
263 	if (imcs <= 0)
264 		return imcs;
265 
266 	/* Initialize perf_event_attr structures for all iMC's */
267 	for (imc = 0; imc < imcs; imc++) {
268 		for (j = 0; j < 2; j++)
269 			membw_initialize_perf_event_attr(imc, j);
270 	}
271 
272 	return 0;
273 }
274 
perf_close_imc_mem_bw(void)275 static void perf_close_imc_mem_bw(void)
276 {
277 	int mc;
278 
279 	for (mc = 0; mc < imcs; mc++) {
280 		if (imc_counters_config[mc][READ].fd != -1)
281 			close(imc_counters_config[mc][READ].fd);
282 		if (imc_counters_config[mc][WRITE].fd != -1)
283 			close(imc_counters_config[mc][WRITE].fd);
284 	}
285 }
286 
287 /*
288  * perf_open_imc_mem_bw - Open perf fds for IMCs
289  * @cpu_no: CPU number that the benchmark PID is bound to
290  *
291  * Return: = 0 on success. < 0 on failure.
292  */
perf_open_imc_mem_bw(int cpu_no)293 static int perf_open_imc_mem_bw(int cpu_no)
294 {
295 	int imc, ret;
296 
297 	for (imc = 0; imc < imcs; imc++) {
298 		imc_counters_config[imc][READ].fd = -1;
299 		imc_counters_config[imc][WRITE].fd = -1;
300 	}
301 
302 	for (imc = 0; imc < imcs; imc++) {
303 		ret = open_perf_event(imc, cpu_no, READ);
304 		if (ret)
305 			goto close_fds;
306 		ret = open_perf_event(imc, cpu_no, WRITE);
307 		if (ret)
308 			goto close_fds;
309 	}
310 
311 	return 0;
312 
313 close_fds:
314 	perf_close_imc_mem_bw();
315 	return -1;
316 }
317 
318 /*
319  * do_mem_bw_test - Perform memory bandwidth test
320  *
321  * Runs memory bandwidth test over one second period. Also, handles starting
322  * and stopping of the IMC perf counters around the test.
323  */
do_imc_mem_bw_test(void)324 static void do_imc_mem_bw_test(void)
325 {
326 	int imc;
327 
328 	for (imc = 0; imc < imcs; imc++) {
329 		membw_ioctl_perf_event_ioc_reset_enable(imc, READ);
330 		membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE);
331 	}
332 
333 	sleep(1);
334 
335 	/* Stop counters after a second to get results (both read and write) */
336 	for (imc = 0; imc < imcs; imc++) {
337 		membw_ioctl_perf_event_ioc_disable(imc, READ);
338 		membw_ioctl_perf_event_ioc_disable(imc, WRITE);
339 	}
340 }
341 
342 /*
343  * get_mem_bw_imc - Memory bandwidth as reported by iMC counters
344  * @bw_report: Bandwidth report type (reads, writes)
345  *
346  * Memory bandwidth utilized by a process on a socket can be calculated
347  * using iMC counters. Perf events are used to read these counters.
348  *
349  * Return: = 0 on success. < 0 on failure.
350  */
get_mem_bw_imc(const char * bw_report,float * bw_imc)351 static int get_mem_bw_imc(const char *bw_report, float *bw_imc)
352 {
353 	float reads, writes, of_mul_read, of_mul_write;
354 	int imc;
355 
356 	/* Start all iMC counters to log values (both read and write) */
357 	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
358 
359 	/*
360 	 * Get results which are stored in struct type imc_counter_config
361 	 * Take overflow into consideration before calculating total bandwidth.
362 	 */
363 	for (imc = 0; imc < imcs; imc++) {
364 		struct imc_counter_config *r =
365 			&imc_counters_config[imc][READ];
366 		struct imc_counter_config *w =
367 			&imc_counters_config[imc][WRITE];
368 
369 		if (read(r->fd, &r->return_value,
370 			 sizeof(struct membw_read_format)) == -1) {
371 			ksft_perror("Couldn't get read bandwidth through iMC");
372 			return -1;
373 		}
374 
375 		if (read(w->fd, &w->return_value,
376 			 sizeof(struct membw_read_format)) == -1) {
377 			ksft_perror("Couldn't get write bandwidth through iMC");
378 			return -1;
379 		}
380 
381 		__u64 r_time_enabled = r->return_value.time_enabled;
382 		__u64 r_time_running = r->return_value.time_running;
383 
384 		if (r_time_enabled != r_time_running)
385 			of_mul_read = (float)r_time_enabled /
386 					(float)r_time_running;
387 
388 		__u64 w_time_enabled = w->return_value.time_enabled;
389 		__u64 w_time_running = w->return_value.time_running;
390 
391 		if (w_time_enabled != w_time_running)
392 			of_mul_write = (float)w_time_enabled /
393 					(float)w_time_running;
394 		reads += r->return_value.value * of_mul_read * SCALE;
395 		writes += w->return_value.value * of_mul_write * SCALE;
396 	}
397 
398 	if (strcmp(bw_report, "reads") == 0) {
399 		*bw_imc = reads;
400 		return 0;
401 	}
402 
403 	if (strcmp(bw_report, "writes") == 0) {
404 		*bw_imc = writes;
405 		return 0;
406 	}
407 
408 	*bw_imc = reads + writes;
409 	return 0;
410 }
411 
412 /*
413  * initialize_mem_bw_resctrl:	Appropriately populate "mbm_total_path"
414  * @param:	Parameters passed to resctrl_val()
415  * @domain_id:	Domain ID (cache ID; for MB, L3 cache ID)
416  */
initialize_mem_bw_resctrl(const struct resctrl_val_param * param,int domain_id)417 void initialize_mem_bw_resctrl(const struct resctrl_val_param *param,
418 			       int domain_id)
419 {
420 	sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
421 		param->ctrlgrp, domain_id);
422 }
423 
424 /*
425  * Open file to read MBM local bytes from resctrl FS
426  */
open_mem_bw_resctrl(const char * mbm_bw_file)427 static FILE *open_mem_bw_resctrl(const char *mbm_bw_file)
428 {
429 	FILE *fp;
430 
431 	fp = fopen(mbm_bw_file, "r");
432 	if (!fp)
433 		ksft_perror("Failed to open total memory bandwidth file");
434 
435 	return fp;
436 }
437 
438 /*
439  * Get MBM Local bytes as reported by resctrl FS
440  */
get_mem_bw_resctrl(FILE * fp,unsigned long * mbm_total)441 static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total)
442 {
443 	if (fscanf(fp, "%lu\n", mbm_total) <= 0) {
444 		ksft_perror("Could not get MBM local bytes");
445 		return -1;
446 	}
447 	return 0;
448 }
449 
450 static pid_t bm_pid, ppid;
451 
ctrlc_handler(int signum,siginfo_t * info,void * ptr)452 void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
453 {
454 	/* Only kill child after bm_pid is set after fork() */
455 	if (bm_pid)
456 		kill(bm_pid, SIGKILL);
457 	umount_resctrlfs();
458 	if (current_test && current_test->cleanup)
459 		current_test->cleanup();
460 	ksft_print_msg("Ending\n\n");
461 
462 	exit(EXIT_SUCCESS);
463 }
464 
465 /*
466  * Register CTRL-C handler for parent, as it has to kill
467  * child process before exiting.
468  */
signal_handler_register(const struct resctrl_test * test)469 int signal_handler_register(const struct resctrl_test *test)
470 {
471 	struct sigaction sigact = {};
472 	int ret = 0;
473 
474 	bm_pid = 0;
475 
476 	current_test = test;
477 	sigact.sa_sigaction = ctrlc_handler;
478 	sigemptyset(&sigact.sa_mask);
479 	sigact.sa_flags = SA_SIGINFO;
480 	if (sigaction(SIGINT, &sigact, NULL) ||
481 	    sigaction(SIGTERM, &sigact, NULL) ||
482 	    sigaction(SIGHUP, &sigact, NULL)) {
483 		ksft_perror("sigaction");
484 		ret = -1;
485 	}
486 	return ret;
487 }
488 
489 /*
490  * Reset signal handler to SIG_DFL.
491  * Non-Value return because the caller should keep
492  * the error code of other path even if sigaction fails.
493  */
signal_handler_unregister(void)494 void signal_handler_unregister(void)
495 {
496 	struct sigaction sigact = {};
497 
498 	current_test = NULL;
499 	sigact.sa_handler = SIG_DFL;
500 	sigemptyset(&sigact.sa_mask);
501 	if (sigaction(SIGINT, &sigact, NULL) ||
502 	    sigaction(SIGTERM, &sigact, NULL) ||
503 	    sigaction(SIGHUP, &sigact, NULL)) {
504 		ksft_perror("sigaction");
505 	}
506 }
507 
parent_exit(pid_t ppid)508 static void parent_exit(pid_t ppid)
509 {
510 	kill(ppid, SIGKILL);
511 	umount_resctrlfs();
512 	exit(EXIT_FAILURE);
513 }
514 
515 /*
516  * print_results_bw:	the memory bandwidth results are stored in a file
517  * @filename:		file that stores the results
518  * @bm_pid:		child pid that runs benchmark
519  * @bw_imc:		perf imc counter value
520  * @bw_resc:		memory bandwidth value
521  *
522  * Return:		0 on success, < 0 on error.
523  */
print_results_bw(char * filename,pid_t bm_pid,float bw_imc,unsigned long bw_resc)524 static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc,
525 			    unsigned long bw_resc)
526 {
527 	unsigned long diff = fabs(bw_imc - bw_resc);
528 	FILE *fp;
529 
530 	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
531 		printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc);
532 		printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
533 	} else {
534 		fp = fopen(filename, "a");
535 		if (!fp) {
536 			ksft_perror("Cannot open results file");
537 
538 			return -1;
539 		}
540 		if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
541 			    (int)bm_pid, bw_imc, bw_resc, diff) <= 0) {
542 			ksft_print_msg("Could not log results\n");
543 			fclose(fp);
544 
545 			return -1;
546 		}
547 		fclose(fp);
548 	}
549 
550 	return 0;
551 }
552 
553 /*
554  * measure_mem_bw - Measures memory bandwidth numbers while benchmark runs
555  * @uparams:		User supplied parameters
556  * @param:		Parameters passed to resctrl_val()
557  * @bm_pid:		PID that runs the benchmark
558  * @bw_report:		Bandwidth report type (reads, writes)
559  *
560  * Measure memory bandwidth from resctrl and from another source which is
561  * perf imc value or could be something else if perf imc event is not
562  * available. Compare the two values to validate resctrl value. It takes
563  * 1 sec to measure the data.
564  */
measure_mem_bw(const struct user_params * uparams,struct resctrl_val_param * param,pid_t bm_pid,const char * bw_report)565 int measure_mem_bw(const struct user_params *uparams,
566 		   struct resctrl_val_param *param, pid_t bm_pid,
567 		   const char *bw_report)
568 {
569 	unsigned long bw_resc, bw_resc_start, bw_resc_end;
570 	FILE *mem_bw_fp;
571 	float bw_imc;
572 	int ret;
573 
574 	bw_report = get_bw_report_type(bw_report);
575 	if (!bw_report)
576 		return -1;
577 
578 	mem_bw_fp = open_mem_bw_resctrl(mbm_total_path);
579 	if (!mem_bw_fp)
580 		return -1;
581 
582 	ret = perf_open_imc_mem_bw(uparams->cpu);
583 	if (ret < 0)
584 		goto close_fp;
585 
586 	ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start);
587 	if (ret < 0)
588 		goto close_imc;
589 
590 	rewind(mem_bw_fp);
591 
592 	do_imc_mem_bw_test();
593 
594 	ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end);
595 	if (ret < 0)
596 		goto close_imc;
597 
598 	ret = get_mem_bw_imc(bw_report, &bw_imc);
599 	if (ret < 0)
600 		goto close_imc;
601 
602 	perf_close_imc_mem_bw();
603 	fclose(mem_bw_fp);
604 
605 	bw_resc = (bw_resc_end - bw_resc_start) / MB;
606 
607 	return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
608 
609 close_imc:
610 	perf_close_imc_mem_bw();
611 close_fp:
612 	fclose(mem_bw_fp);
613 	return ret;
614 }
615 
616 /*
617  * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
618  *		   in specified signal. Direct benchmark stdio to /dev/null.
619  * @signum:	signal number
620  * @info:	signal info
621  * @ucontext:	user context in signal handling
622  */
run_benchmark(int signum,siginfo_t * info,void * ucontext)623 static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
624 {
625 	int operation, ret, memflush;
626 	char **benchmark_cmd;
627 	size_t span;
628 	bool once;
629 	FILE *fp;
630 
631 	benchmark_cmd = info->si_ptr;
632 
633 	/*
634 	 * Direct stdio of child to /dev/null, so that only parent writes to
635 	 * stdio (console)
636 	 */
637 	fp = freopen("/dev/null", "w", stdout);
638 	if (!fp) {
639 		ksft_perror("Unable to direct benchmark status to /dev/null");
640 		parent_exit(ppid);
641 	}
642 
643 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
644 		/* Execute default fill_buf benchmark */
645 		span = strtoul(benchmark_cmd[1], NULL, 10);
646 		memflush =  atoi(benchmark_cmd[2]);
647 		operation = atoi(benchmark_cmd[3]);
648 		if (!strcmp(benchmark_cmd[4], "true")) {
649 			once = true;
650 		} else if (!strcmp(benchmark_cmd[4], "false")) {
651 			once = false;
652 		} else {
653 			ksft_print_msg("Invalid once parameter\n");
654 			parent_exit(ppid);
655 		}
656 
657 		if (run_fill_buf(span, memflush, operation, once))
658 			fprintf(stderr, "Error in running fill buffer\n");
659 	} else {
660 		/* Execute specified benchmark */
661 		ret = execvp(benchmark_cmd[0], benchmark_cmd);
662 		if (ret)
663 			ksft_perror("execvp");
664 	}
665 
666 	fclose(stdout);
667 	ksft_print_msg("Unable to run specified benchmark\n");
668 	parent_exit(ppid);
669 }
670 
671 /*
672  * resctrl_val:	execute benchmark and measure memory bandwidth on
673  *			the benchmark
674  * @test:		test information structure
675  * @uparams:		user supplied parameters
676  * @benchmark_cmd:	benchmark command and its arguments
677  * @param:		parameters passed to resctrl_val()
678  *
679  * Return:		0 when the test was run, < 0 on error.
680  */
resctrl_val(const struct resctrl_test * test,const struct user_params * uparams,const char * const * benchmark_cmd,struct resctrl_val_param * param)681 int resctrl_val(const struct resctrl_test *test,
682 		const struct user_params *uparams,
683 		const char * const *benchmark_cmd,
684 		struct resctrl_val_param *param)
685 {
686 	struct sigaction sigact;
687 	int ret = 0, pipefd[2];
688 	char pipe_message = 0;
689 	union sigval value;
690 	int domain_id;
691 
692 	if (strcmp(param->filename, "") == 0)
693 		sprintf(param->filename, "stdio");
694 
695 	ret = get_domain_id(test->resource, uparams->cpu, &domain_id);
696 	if (ret < 0) {
697 		ksft_print_msg("Could not get domain ID\n");
698 		return ret;
699 	}
700 
701 	/*
702 	 * If benchmark wasn't successfully started by child, then child should
703 	 * kill parent, so save parent's pid
704 	 */
705 	ppid = getpid();
706 
707 	if (pipe(pipefd)) {
708 		ksft_perror("Unable to create pipe");
709 
710 		return -1;
711 	}
712 
713 	/*
714 	 * Fork to start benchmark, save child's pid so that it can be killed
715 	 * when needed
716 	 */
717 	fflush(stdout);
718 	bm_pid = fork();
719 	if (bm_pid == -1) {
720 		ksft_perror("Unable to fork");
721 
722 		return -1;
723 	}
724 
725 	if (bm_pid == 0) {
726 		/*
727 		 * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
728 		 * start benchmark
729 		 */
730 		sigfillset(&sigact.sa_mask);
731 		sigdelset(&sigact.sa_mask, SIGUSR1);
732 
733 		sigact.sa_sigaction = run_benchmark;
734 		sigact.sa_flags = SA_SIGINFO;
735 
736 		/* Register for "SIGUSR1" signal from parent */
737 		if (sigaction(SIGUSR1, &sigact, NULL)) {
738 			ksft_perror("Can't register child for signal");
739 			parent_exit(ppid);
740 		}
741 
742 		/* Tell parent that child is ready */
743 		close(pipefd[0]);
744 		pipe_message = 1;
745 		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
746 		    sizeof(pipe_message)) {
747 			ksft_perror("Failed signaling parent process");
748 			close(pipefd[1]);
749 			return -1;
750 		}
751 		close(pipefd[1]);
752 
753 		/* Suspend child until delivery of "SIGUSR1" from parent */
754 		sigsuspend(&sigact.sa_mask);
755 
756 		ksft_perror("Child is done");
757 		parent_exit(ppid);
758 	}
759 
760 	ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid);
761 
762 	/*
763 	 * The cast removes constness but nothing mutates benchmark_cmd within
764 	 * the context of this process. At the receiving process, it becomes
765 	 * argv, which is mutable, on exec() but that's after fork() so it
766 	 * doesn't matter for the process running the tests.
767 	 */
768 	value.sival_ptr = (void *)benchmark_cmd;
769 
770 	/* Taskset benchmark to specified cpu */
771 	ret = taskset_benchmark(bm_pid, uparams->cpu, NULL);
772 	if (ret)
773 		goto out;
774 
775 	/* Write benchmark to specified control&monitoring grp in resctrl FS */
776 	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp);
777 	if (ret)
778 		goto out;
779 
780 	if (param->init) {
781 		ret = param->init(param, domain_id);
782 		if (ret)
783 			goto out;
784 	}
785 
786 	/* Parent waits for child to be ready. */
787 	close(pipefd[1]);
788 	while (pipe_message != 1) {
789 		if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
790 		    sizeof(pipe_message)) {
791 			ksft_perror("Failed reading message from child process");
792 			close(pipefd[0]);
793 			goto out;
794 		}
795 	}
796 	close(pipefd[0]);
797 
798 	/* Signal child to start benchmark */
799 	if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
800 		ksft_perror("sigqueue SIGUSR1 to child");
801 		ret = -1;
802 		goto out;
803 	}
804 
805 	/* Give benchmark enough time to fully run */
806 	sleep(1);
807 
808 	/* Test runs until the callback setup() tells the test to stop. */
809 	while (1) {
810 		ret = param->setup(test, uparams, param);
811 		if (ret == END_OF_TESTS) {
812 			ret = 0;
813 			break;
814 		}
815 		if (ret < 0)
816 			break;
817 
818 		ret = param->measure(uparams, param, bm_pid);
819 		if (ret)
820 			break;
821 	}
822 
823 out:
824 	kill(bm_pid, SIGKILL);
825 
826 	return ret;
827 }
828