• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
4  * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
5  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6  */
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/types.h>
10 #include <dirent.h>
11 #include <limits.h>
12 #include <unistd.h>
13 #include <errno.h>
14 
15 #include "trace-local.h"
16 #include "trace-msg.h"
17 
18 static struct trace_guest *guests;
19 static size_t guests_len;
20 
get_guest_by_cid(unsigned int guest_cid)21 static struct trace_guest *get_guest_by_cid(unsigned int guest_cid)
22 {
23 	int i;
24 
25 	if (!guests)
26 		return NULL;
27 
28 	for (i = 0; i < guests_len; i++)
29 		if (guest_cid == guests[i].cid)
30 			return guests + i;
31 	return NULL;
32 }
33 
get_guest_by_name(const char * name)34 static struct trace_guest *get_guest_by_name(const char *name)
35 {
36 	int i;
37 
38 	if (!guests || !strlen(name))
39 		return NULL;
40 
41 	for (i = 0; i < guests_len; i++)
42 		if (strcmp(name, guests[i].name) == 0)
43 			return guests + i;
44 	return NULL;
45 }
46 
trace_have_guests_pid(void)47 bool trace_have_guests_pid(void)
48 {
49 	for (int i = 0; i < guests_len; i++) {
50 		if (guests[i].pid < 0)
51 			return false;
52 	}
53 
54 	return true;
55 }
56 
57 /* Find all the tasks associated with the guest pid */
find_tasks(struct trace_guest * guest)58 static void find_tasks(struct trace_guest *guest)
59 {
60 	struct dirent *dent;
61 	char *path;
62 	DIR *dir;
63 	int ret;
64 	int tasks = 0;
65 
66 	ret = asprintf(&path, "/proc/%d/task", guest->pid);
67 	if (ret < 0)
68 		return;
69 
70 	dir = opendir(path);
71 	free(path);
72 	if (!dir)
73 		return;
74 
75 	while ((dent = readdir(dir))) {
76 		int *pids;
77 		if (!(dent->d_type == DT_DIR && is_digits(dent->d_name)))
78 			continue;
79 		pids = realloc(guest->task_pids, sizeof(int) * (tasks + 2));
80 		if (!pids)
81 			break;
82 		pids[tasks++] = strtol(dent->d_name, NULL, 0);
83 		pids[tasks] = -1;
84 		guest->task_pids = pids;
85 	}
86 	closedir(dir);
87 }
88 
89 static void find_pid_by_cid(struct trace_guest *guest);
90 
add_guest(unsigned int cid,const char * name)91 static struct trace_guest *add_guest(unsigned int cid, const char *name)
92 {
93 	struct trace_guest *guest;
94 
95 	guests = realloc(guests, (guests_len + 1) * sizeof(*guests));
96 	if (!guests)
97 		die("allocating new guest");
98 
99 	guest = &guests[guests_len++];
100 
101 	memset(guest, 0, sizeof(*guest));
102 	guest->name = strdup(name);
103 	if (!guest->name)
104 		die("allocating guest name");
105 	guest->cid = cid;
106 	guest->pid = -1;
107 
108 	find_pid_by_cid(guest);
109 	find_tasks(guest);
110 
111 	return guest;
112 }
113 
start_trace_connect(void)114 static struct tracefs_instance *start_trace_connect(void)
115 {
116 	struct tracefs_instance *open_instance;
117 
118 	open_instance = tracefs_instance_create("vsock_find_pid");
119 	if (!open_instance)
120 		return NULL;
121 
122 	tracefs_event_enable(open_instance, "sched", "sched_waking");
123 	tracefs_event_enable(open_instance, "kvm", "kvm_exit");
124 	tracefs_trace_on(open_instance);
125 	return open_instance;
126 }
127 
128 struct pids {
129 	struct pids		*next;
130 	int			pid;
131 };
132 
133 struct trace_fields {
134 	struct tep_event		*sched_waking;
135 	struct tep_event		*kvm_exit;
136 	struct tep_format_field		*common_pid;
137 	struct tep_format_field		*sched_next;
138 	struct pids			*pids;
139 	int				found_pid;
140 };
141 
free_pids(struct pids * pids)142 static void free_pids(struct pids *pids)
143 {
144 	struct pids *next;
145 
146 	while (pids) {
147 		next = pids;
148 		pids = pids->next;
149 		free(next);
150 	}
151 }
152 
add_pid(struct pids ** pids,int pid)153 static void add_pid(struct pids **pids, int pid)
154 {
155 	struct pids *new_pid;
156 
157 	new_pid = malloc(sizeof(*new_pid));
158 	if (!new_pid)
159 		return;
160 
161 	new_pid->pid = pid;
162 	new_pid->next = *pids;
163 	*pids = new_pid;
164 }
165 
match_pid(struct pids * pids,int pid)166 static bool match_pid(struct pids *pids, int pid)
167 {
168 	while (pids) {
169 		if (pids->pid == pid)
170 			return true;
171 		pids = pids->next;
172 	}
173 	return false;
174 }
175 
callback(struct tep_event * event,struct tep_record * record,int cpu,void * data)176 static int callback(struct tep_event *event, struct tep_record *record, int cpu,
177 		    void *data)
178 {
179 	struct trace_fields *fields = data;
180 	struct tep_handle *tep = event->tep;
181 	unsigned long long val;
182 	int flags;
183 	int type;
184 	int pid;
185 	int ret;
186 
187 	ret = tep_read_number_field(fields->common_pid, record->data, &val);
188 	if (ret < 0)
189 		return 0;
190 
191 	flags = tep_data_flags(tep, record);
192 
193 	/* Ignore events in interrupts */
194 	if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ))
195 		return 0;
196 
197 	/*
198 	 * First make sure that this event comes from a PID from
199 	 * this task (or a task woken by this task)
200 	 */
201 	pid = val;
202 	if (!match_pid(fields->pids, pid))
203 		return 0;
204 
205 	type = tep_data_type(tep, record);
206 
207 	/*
208 	 * If this event is a kvm_exit, we have our PID
209 	 * and we can stop processing.
210 	 */
211 	if (type == fields->kvm_exit->id) {
212 		fields->found_pid = pid;
213 		return -1;
214 	}
215 
216 	if (type != fields->sched_waking->id)
217 		return 0;
218 
219 	ret = tep_read_number_field(fields->sched_next, record->data, &val);
220 	if (ret < 0)
221 		return 0;
222 
223 	/* This is a task woken by our task or a chain of wake ups */
224 	add_pid(&fields->pids, (int)val);
225 	return 0;
226 }
227 
find_tgid(int pid)228 static int find_tgid(int pid)
229 {
230 	FILE *fp;
231 	char *path;
232 	char *buf = NULL;
233 	char *save;
234 	size_t l = 0;
235 	int tgid = -1;
236 
237 	if (asprintf(&path, "/proc/%d/status", pid) < 0)
238 		return -1;
239 
240 	fp = fopen(path, "r");
241 	free(path);
242 	if (!fp)
243 		return -1;
244 
245 	while (getline(&buf, &l, fp) > 0) {
246 		char *tok;
247 
248 		if (strncmp(buf, "Tgid:", 5) != 0)
249 			continue;
250 		tok = strtok_r(buf, ":", &save);
251 		if (!tok)
252 			continue;
253 		tok = strtok_r(NULL, ":", &save);
254 		if (!tok)
255 			continue;
256 		while (isspace(*tok))
257 			tok++;
258 		tgid = strtol(tok, NULL, 0);
259 		break;
260 	}
261 	free(buf);
262 	fclose(fp);
263 
264 	return tgid;
265 }
266 
stop_trace_connect(struct tracefs_instance * open_instance)267 static int stop_trace_connect(struct tracefs_instance *open_instance)
268 {
269 	const char *systems[] = { "kvm", "sched", NULL};
270 	struct tep_handle *tep;
271 	struct trace_fields trace_fields;
272 	int tgid = -1;
273 
274 	if (!open_instance)
275 		return -1;
276 
277 	/* The connection is finished, stop tracing, we have what we want */
278 	tracefs_trace_off(open_instance);
279 	tracefs_event_disable(open_instance, NULL, NULL);
280 
281 	tep = tracefs_local_events_system(NULL, systems);
282 
283 	trace_fields.found_pid = -1;
284 	trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking");
285 	if (!trace_fields.sched_waking)
286 		goto out;
287 	trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit");
288 	if (!trace_fields.kvm_exit)
289 		goto out;
290 	trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking,
291 							"common_pid");
292 	if (!trace_fields.common_pid)
293 		goto out;
294 	trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking,
295 							"pid");
296 	if (!trace_fields.sched_next)
297 		goto out;
298 
299 	trace_fields.pids = NULL;
300 	add_pid(&trace_fields.pids, getpid());
301 	tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields);
302 	free_pids(trace_fields.pids);
303  out:
304 	tracefs_instance_destroy(open_instance);
305 	tracefs_instance_free(open_instance);
306 
307 	if (trace_fields.found_pid > 0)
308 		tgid = find_tgid(trace_fields.found_pid);
309 
310 	return tgid;
311 }
312 
313 /*
314  * In order to find the guest that is associated to the given cid,
315  * trace the sched_waking and kvm_exit events, connect to the cid
316  * (doesn't matter what port, use -1 to not connect to anything)
317  * and find what task gets woken up from this code and calls kvm_exit,
318  * then that is the task that is running the guest.
319  * Then look at the /proc/<guest-pid>/status file to find the task group
320  * id (Tgid), and this is the PID of the task running all the threads.
321  */
find_pid_by_cid(struct trace_guest * guest)322 static void find_pid_by_cid(struct trace_guest *guest)
323 {
324 	struct tracefs_instance *instance;
325 	int fd;
326 
327 	instance = start_trace_connect();
328 	fd = trace_vsock_open(guest->cid, -1);
329 	guest->pid = stop_trace_connect(instance);
330 	/* Just in case! */
331 	if (fd >= 0)
332 		close(fd);
333 }
334 
trace_get_guest(unsigned int cid,const char * name)335 struct trace_guest *trace_get_guest(unsigned int cid, const char *name)
336 {
337 	struct trace_guest *guest = NULL;
338 
339 	if (name) {
340 		guest = get_guest_by_name(name);
341 		if (guest)
342 			return guest;
343 	}
344 
345 	if (cid > 0) {
346 		guest = get_guest_by_cid(cid);
347 		if (!guest && name)
348 			guest = add_guest(cid, name);
349 	}
350 	return guest;
351 }
352 
353 #define VM_CID_CMD	"virsh dumpxml"
354 #define VM_CID_LINE	"<cid auto="
355 #define VM_CID_ID	"address='"
read_guest_cid(char * name)356 static void read_guest_cid(char *name)
357 {
358 	char *cmd = NULL;
359 	char line[512];
360 	char *cid;
361 	unsigned int cid_id = 0;
362 	FILE *f;
363 
364 	asprintf(&cmd, "%s %s", VM_CID_CMD, name);
365 	f = popen(cmd, "r");
366 	free(cmd);
367 	if (f == NULL)
368 		return;
369 
370 	while (fgets(line, sizeof(line), f) != NULL) {
371 		if (!strstr(line, VM_CID_LINE))
372 			continue;
373 		cid = strstr(line, VM_CID_ID);
374 		if (!cid)
375 			continue;
376 		cid_id = strtol(cid + strlen(VM_CID_ID), NULL, 10);
377 		if ((cid_id == INT_MIN || cid_id == INT_MAX) && errno == ERANGE)
378 			continue;
379 		add_guest(cid_id, name);
380 		break;
381 	}
382 
383 	/* close */
384 	pclose(f);
385 }
386 
387 #define VM_NAME_CMD	"virsh list --name"
read_qemu_guests(void)388 void read_qemu_guests(void)
389 {
390 	char name[256];
391 	FILE *f;
392 
393 	f = popen(VM_NAME_CMD, "r");
394 	if (f == NULL)
395 		return;
396 
397 	while (fgets(name, sizeof(name), f) != NULL) {
398 		if (name[0] == '\n')
399 			continue;
400 		if (name[strlen(name) - 1] == '\n')
401 			name[strlen(name) - 1] = '\0';
402 		read_guest_cid(name);
403 	}
404 
405 	/* close */
406 	pclose(f);
407 }
408 
get_guest_vcpu_pid(unsigned int guest_cid,unsigned int guest_vcpu)409 int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu)
410 {
411 	int i;
412 
413 	if (!guests)
414 		return -1;
415 
416 	for (i = 0; i < guests_len; i++) {
417 		if (guests[i].cpu_pid < 0 || guest_vcpu >= guests[i].cpu_max)
418 			continue;
419 		if (guest_cid == guests[i].cid)
420 			return guests[i].cpu_pid[guest_vcpu];
421 	}
422 	return -1;
423 }
424 
425 /**
426  * trace_add_guest_info - Add the guest info into the trace file option
427  * @handle: The file handle that the guest info option is added to
428  * @instance: The instance that that represents the guest
429  *
430  * Adds information about the guest from the @instance into an option
431  * for the @instance. It records the trace_id, the number of CPUs,
432  * as well as the PIDs of the host that represent the CPUs.
433  */
434 void
trace_add_guest_info(struct tracecmd_output * handle,struct buffer_instance * instance)435 trace_add_guest_info(struct tracecmd_output *handle, struct buffer_instance *instance)
436 {
437 	unsigned long long trace_id;
438 	struct trace_guest *guest;
439 	const char *name;
440 	char *buf, *p;
441 	int cpus;
442 	int size;
443 	int pid;
444 	int i;
445 
446 	if (is_network(instance)) {
447 		name = instance->name;
448 		cpus = instance->cpu_count;
449 		trace_id = instance->trace_id;
450 	} else {
451 		guest = trace_get_guest(instance->cid, NULL);
452 		if (!guest)
453 			return;
454 		cpus = guest->cpu_max;
455 		name = guest->name;
456 		/*
457 		 * If this is a proxy, the trace_id of the guest is
458 		 * in the guest descriptor (added in trace_tsync_as_host().
459 		 */
460 		if (guest->trace_id)
461 			trace_id = guest->trace_id;
462 		else
463 			trace_id = instance->trace_id;
464 	}
465 
466 	size = strlen(name) + 1;
467 	size += sizeof(long long);	/* trace_id */
468 	size += sizeof(int);		/* cpu count */
469 	size += cpus * 2 * sizeof(int);	/* cpu,pid pair */
470 
471 	buf = calloc(1, size);
472 	if (!buf)
473 		return;
474 	p = buf;
475 	strcpy(p, name);
476 	p += strlen(name) + 1;
477 
478 	memcpy(p, &trace_id, sizeof(long long));
479 	p += sizeof(long long);
480 
481 	memcpy(p, &cpus, sizeof(int));
482 	p += sizeof(int);
483 	for (i = 0; i < cpus; i++) {
484 		if (is_network(instance))
485 			pid = -1;
486 		else
487 			pid = guest->cpu_pid[i];
488 		memcpy(p, &i, sizeof(int));
489 		p += sizeof(int);
490 		memcpy(p, &pid, sizeof(int));
491 		p += sizeof(int);
492 	}
493 
494 	tracecmd_add_option(handle, TRACECMD_OPTION_GUEST, size, buf);
495 	free(buf);
496 }
497