• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
4  * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
5  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6  */
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/types.h>
10 #include <dirent.h>
11 #include <limits.h>
12 #include <unistd.h>
13 #include <errno.h>
14 
15 #include "trace-local.h"
16 #include "trace-msg.h"
17 
18 static struct trace_guest *guests;
19 static size_t guests_len;
20 
get_guest_by_cid(unsigned int guest_cid)21 static struct trace_guest *get_guest_by_cid(unsigned int guest_cid)
22 {
23 	int i;
24 
25 	if (!guests)
26 		return NULL;
27 
28 	for (i = 0; i < guests_len; i++)
29 		if (guest_cid == guests[i].cid)
30 			return guests + i;
31 	return NULL;
32 }
33 
get_guest_by_name(const char * name)34 static struct trace_guest *get_guest_by_name(const char *name)
35 {
36 	int i;
37 
38 	if (!guests)
39 		return NULL;
40 
41 	for (i = 0; i < guests_len; i++)
42 		if (strcmp(name, guests[i].name) == 0)
43 			return guests + i;
44 	return NULL;
45 }
46 
trace_have_guests_pid(void)47 bool trace_have_guests_pid(void)
48 {
49 	for (int i = 0; i < guests_len; i++) {
50 		if (guests[i].pid < 0)
51 			return false;
52 	}
53 
54 	return true;
55 }
56 
add_guest(unsigned int cid,const char * name)57 static struct trace_guest *add_guest(unsigned int cid, const char *name)
58 {
59 	guests = realloc(guests, (guests_len + 1) * sizeof(*guests));
60 	if (!guests)
61 		die("allocating new guest");
62 	memset(&guests[guests_len], 0, sizeof(struct trace_guest));
63 	guests[guests_len].name = strdup(name);
64 	if (!guests[guests_len].name)
65 		die("allocating guest name");
66 	guests[guests_len].cid = cid;
67 	guests[guests_len].pid = -1;
68 	guests_len++;
69 
70 	return &guests[guests_len - 1];
71 }
72 
start_trace_connect(void)73 static struct tracefs_instance *start_trace_connect(void)
74 {
75 	struct tracefs_instance *open_instance;
76 
77 	open_instance = tracefs_instance_create("vsock_find_pid");
78 	if (!open_instance)
79 		return NULL;
80 
81 	tracefs_event_enable(open_instance, "sched", "sched_waking");
82 	tracefs_event_enable(open_instance, "kvm", "kvm_exit");
83 	tracefs_trace_on(open_instance);
84 	return open_instance;
85 }
86 
87 struct pids {
88 	struct pids		*next;
89 	int			pid;
90 };
91 
92 struct trace_fields {
93 	struct tep_event		*sched_waking;
94 	struct tep_event		*kvm_exit;
95 	struct tep_format_field		*common_pid;
96 	struct tep_format_field		*sched_next;
97 	struct pids			*pids;
98 	int				found_pid;
99 };
100 
free_pids(struct pids * pids)101 static void free_pids(struct pids *pids)
102 {
103 	struct pids *next;
104 
105 	while (pids) {
106 		next = pids;
107 		pids = pids->next;
108 		free(next);
109 	}
110 }
111 
add_pid(struct pids ** pids,int pid)112 static void add_pid(struct pids **pids, int pid)
113 {
114 	struct pids *new_pid;
115 
116 	new_pid = malloc(sizeof(*new_pid));
117 	if (!new_pid)
118 		return;
119 
120 	new_pid->pid = pid;
121 	new_pid->next = *pids;
122 	*pids = new_pid;
123 }
124 
match_pid(struct pids * pids,int pid)125 static bool match_pid(struct pids *pids, int pid)
126 {
127 	while (pids) {
128 		if (pids->pid == pid)
129 			return true;
130 		pids = pids->next;
131 	}
132 	return false;
133 }
134 
callback(struct tep_event * event,struct tep_record * record,int cpu,void * data)135 static int callback(struct tep_event *event, struct tep_record *record, int cpu,
136 		    void *data)
137 {
138 	struct trace_fields *fields = data;
139 	struct tep_handle *tep = event->tep;
140 	unsigned long long val;
141 	int flags;
142 	int type;
143 	int pid;
144 	int ret;
145 
146 	ret = tep_read_number_field(fields->common_pid, record->data, &val);
147 	if (ret < 0)
148 		return 0;
149 
150 	flags = tep_data_flags(tep, record);
151 
152 	/* Ignore events in interrupts */
153 	if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ))
154 		return 0;
155 
156 	/*
157 	 * First make sure that this event comes from a PID from
158 	 * this task (or a task woken by this task)
159 	 */
160 	pid = val;
161 	if (!match_pid(fields->pids, pid))
162 		return 0;
163 
164 	type = tep_data_type(tep, record);
165 
166 	/*
167 	 * If this event is a kvm_exit, we have our PID
168 	 * and we can stop processing.
169 	 */
170 	if (type == fields->kvm_exit->id) {
171 		fields->found_pid = pid;
172 		return -1;
173 	}
174 
175 	if (type != fields->sched_waking->id)
176 		return 0;
177 
178 	ret = tep_read_number_field(fields->sched_next, record->data, &val);
179 	if (ret < 0)
180 		return 0;
181 
182 	/* This is a task woken by our task or a chain of wake ups */
183 	add_pid(&fields->pids, (int)val);
184 	return 0;
185 }
186 
find_tgid(int pid)187 static int find_tgid(int pid)
188 {
189 	FILE *fp;
190 	char *path;
191 	char *buf = NULL;
192 	char *save;
193 	size_t l = 0;
194 	int tgid = -1;
195 
196 	if (asprintf(&path, "/proc/%d/status", pid) < 0)
197 		return -1;
198 
199 	fp = fopen(path, "r");
200 	free(path);
201 	if (!fp)
202 		return -1;
203 
204 	while (getline(&buf, &l, fp) > 0) {
205 		char *tok;
206 
207 		if (strncmp(buf, "Tgid:", 5) != 0)
208 			continue;
209 		tok = strtok_r(buf, ":", &save);
210 		if (!tok)
211 			continue;
212 		tok = strtok_r(NULL, ":", &save);
213 		if (!tok)
214 			continue;
215 		while (isspace(*tok))
216 			tok++;
217 		tgid = strtol(tok, NULL, 0);
218 		break;
219 	}
220 	free(buf);
221 	fclose(fp);
222 
223 	return tgid;
224 }
225 
stop_trace_connect(struct tracefs_instance * open_instance)226 static int stop_trace_connect(struct tracefs_instance *open_instance)
227 {
228 	const char *systems[] = { "kvm", "sched", NULL};
229 	struct tep_handle *tep;
230 	struct trace_fields trace_fields;
231 	int tgid = -1;
232 
233 	if (!open_instance)
234 		return -1;
235 
236 	/* The connection is finished, stop tracing, we have what we want */
237 	tracefs_trace_off(open_instance);
238 	tracefs_event_disable(open_instance, NULL, NULL);
239 
240 	tep = tracefs_local_events_system(NULL, systems);
241 
242 	trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking");
243 	if (!trace_fields.sched_waking)
244 		goto out;
245 	trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit");
246 	if (!trace_fields.kvm_exit)
247 		goto out;
248 	trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking,
249 							"common_pid");
250 	if (!trace_fields.common_pid)
251 		goto out;
252 	trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking,
253 							"pid");
254 	if (!trace_fields.sched_next)
255 		goto out;
256 
257 	trace_fields.found_pid = -1;
258 	trace_fields.pids = NULL;
259 	add_pid(&trace_fields.pids, getpid());
260 	tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields);
261 	free_pids(trace_fields.pids);
262  out:
263 	tracefs_instance_destroy(open_instance);
264 	tracefs_instance_free(open_instance);
265 
266 	if (trace_fields.found_pid > 0)
267 		tgid = find_tgid(trace_fields.found_pid);
268 
269 	return tgid;
270 }
271 
272 /*
273  * In order to find the guest that is associated to the given cid,
274  * trace the sched_waking and kvm_exit events, connect to the cid
275  * (doesn't matter what port, use -1 to not connect to anything)
276  * and find what task gets woken up from this code and calls kvm_exit,
277  * then that is the task that is running the guest.
278  * Then look at the /proc/<guest-pid>/status file to find the task group
279  * id (Tgid), and this is the PID of the task running all the threads.
280  */
find_pid_by_cid(struct trace_guest * guest)281 static void find_pid_by_cid(struct trace_guest *guest)
282 {
283 	struct tracefs_instance *instance;
284 	int fd;
285 
286 	instance = start_trace_connect();
287 	fd = trace_vsock_open(guest->cid, -1);
288 	guest->pid = stop_trace_connect(instance);
289 	/* Just in case! */
290 	if (fd >= 0)
291 		close(fd);
292 }
293 
trace_get_guest(unsigned int cid,const char * name)294 struct trace_guest *trace_get_guest(unsigned int cid, const char *name)
295 {
296 	struct trace_guest *guest = NULL;
297 
298 	if (name) {
299 		guest = get_guest_by_name(name);
300 		if (guest)
301 			return guest;
302 	}
303 
304 	if (cid > 0) {
305 		guest = get_guest_by_cid(cid);
306 		if (!guest && name) {
307 			guest = add_guest(cid, name);
308 			if (guest)
309 				find_pid_by_cid(guest);
310 		}
311 	}
312 	return guest;
313 }
314 
315 #define VM_CID_CMD	"virsh dumpxml"
316 #define VM_CID_LINE	"<cid auto="
317 #define VM_CID_ID	"address='"
read_guest_cid(char * name)318 static void read_guest_cid(char *name)
319 {
320 	struct trace_guest *guest;
321 	char *cmd = NULL;
322 	char line[512];
323 	char *cid;
324 	unsigned int cid_id = 0;
325 	FILE *f;
326 
327 	asprintf(&cmd, "%s %s", VM_CID_CMD, name);
328 	f = popen(cmd, "r");
329 	free(cmd);
330 	if (f == NULL)
331 		return;
332 
333 	while (fgets(line, sizeof(line), f) != NULL) {
334 		if (!strstr(line, VM_CID_LINE))
335 			continue;
336 		cid = strstr(line, VM_CID_ID);
337 		if (!cid)
338 			continue;
339 		cid_id = strtol(cid + strlen(VM_CID_ID), NULL, 10);
340 		if ((cid_id == INT_MIN || cid_id == INT_MAX) && errno == ERANGE)
341 			continue;
342 		guest = add_guest(cid_id, name);
343 		if (guest)
344 			find_pid_by_cid(guest);
345 		break;
346 	}
347 
348 	/* close */
349 	pclose(f);
350 }
351 
352 #define VM_NAME_CMD	"virsh list --name"
read_qemu_guests(void)353 void read_qemu_guests(void)
354 {
355 	char name[256];
356 	FILE *f;
357 
358 	f = popen(VM_NAME_CMD, "r");
359 	if (f == NULL)
360 		return;
361 
362 	while (fgets(name, sizeof(name), f) != NULL) {
363 		if (name[0] == '\n')
364 			continue;
365 		if (name[strlen(name) - 1] == '\n')
366 			name[strlen(name) - 1] = '\0';
367 		read_guest_cid(name);
368 	}
369 
370 	/* close */
371 	pclose(f);
372 }
373 
get_guest_vcpu_pid(unsigned int guest_cid,unsigned int guest_vcpu)374 int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu)
375 {
376 	int i;
377 
378 	if (!guests)
379 		return -1;
380 
381 	for (i = 0; i < guests_len; i++) {
382 		if (guests[i].cpu_pid < 0 || guest_vcpu >= guests[i].cpu_max)
383 			continue;
384 		if (guest_cid == guests[i].cid)
385 			return guests[i].cpu_pid[guest_vcpu];
386 	}
387 	return -1;
388 }
389