1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
4 * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
5 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 */
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/types.h>
10 #include <dirent.h>
11 #include <limits.h>
12 #include <unistd.h>
13 #include <errno.h>
14
15 #include "trace-local.h"
16 #include "trace-msg.h"
17
18 static struct trace_guest *guests;
19 static size_t guests_len;
20
get_guest_by_cid(unsigned int guest_cid)21 static struct trace_guest *get_guest_by_cid(unsigned int guest_cid)
22 {
23 int i;
24
25 if (!guests)
26 return NULL;
27
28 for (i = 0; i < guests_len; i++)
29 if (guest_cid == guests[i].cid)
30 return guests + i;
31 return NULL;
32 }
33
get_guest_by_name(const char * name)34 static struct trace_guest *get_guest_by_name(const char *name)
35 {
36 int i;
37
38 if (!guests || !strlen(name))
39 return NULL;
40
41 for (i = 0; i < guests_len; i++)
42 if (strcmp(name, guests[i].name) == 0)
43 return guests + i;
44 return NULL;
45 }
46
trace_have_guests_pid(void)47 bool trace_have_guests_pid(void)
48 {
49 for (int i = 0; i < guests_len; i++) {
50 if (guests[i].pid < 0)
51 return false;
52 }
53
54 return true;
55 }
56
57 /* Find all the tasks associated with the guest pid */
find_tasks(struct trace_guest * guest)58 static void find_tasks(struct trace_guest *guest)
59 {
60 struct dirent *dent;
61 char *path;
62 DIR *dir;
63 int ret;
64 int tasks = 0;
65
66 ret = asprintf(&path, "/proc/%d/task", guest->pid);
67 if (ret < 0)
68 return;
69
70 dir = opendir(path);
71 free(path);
72 if (!dir)
73 return;
74
75 while ((dent = readdir(dir))) {
76 int *pids;
77 if (!(dent->d_type == DT_DIR && is_digits(dent->d_name)))
78 continue;
79 pids = realloc(guest->task_pids, sizeof(int) * (tasks + 2));
80 if (!pids)
81 break;
82 pids[tasks++] = strtol(dent->d_name, NULL, 0);
83 pids[tasks] = -1;
84 guest->task_pids = pids;
85 }
86 closedir(dir);
87 }
88
89 static void find_pid_by_cid(struct trace_guest *guest);
90
add_guest(unsigned int cid,const char * name)91 static struct trace_guest *add_guest(unsigned int cid, const char *name)
92 {
93 struct trace_guest *guest;
94
95 guests = realloc(guests, (guests_len + 1) * sizeof(*guests));
96 if (!guests)
97 die("allocating new guest");
98
99 guest = &guests[guests_len++];
100
101 memset(guest, 0, sizeof(*guest));
102 guest->name = strdup(name);
103 if (!guest->name)
104 die("allocating guest name");
105 guest->cid = cid;
106 guest->pid = -1;
107
108 find_pid_by_cid(guest);
109 find_tasks(guest);
110
111 return guest;
112 }
113
start_trace_connect(void)114 static struct tracefs_instance *start_trace_connect(void)
115 {
116 struct tracefs_instance *open_instance;
117
118 open_instance = tracefs_instance_create("vsock_find_pid");
119 if (!open_instance)
120 return NULL;
121
122 tracefs_event_enable(open_instance, "sched", "sched_waking");
123 tracefs_event_enable(open_instance, "kvm", "kvm_exit");
124 tracefs_trace_on(open_instance);
125 return open_instance;
126 }
127
128 struct pids {
129 struct pids *next;
130 int pid;
131 };
132
133 struct trace_fields {
134 struct tep_event *sched_waking;
135 struct tep_event *kvm_exit;
136 struct tep_format_field *common_pid;
137 struct tep_format_field *sched_next;
138 struct pids *pids;
139 int found_pid;
140 };
141
free_pids(struct pids * pids)142 static void free_pids(struct pids *pids)
143 {
144 struct pids *next;
145
146 while (pids) {
147 next = pids;
148 pids = pids->next;
149 free(next);
150 }
151 }
152
add_pid(struct pids ** pids,int pid)153 static void add_pid(struct pids **pids, int pid)
154 {
155 struct pids *new_pid;
156
157 new_pid = malloc(sizeof(*new_pid));
158 if (!new_pid)
159 return;
160
161 new_pid->pid = pid;
162 new_pid->next = *pids;
163 *pids = new_pid;
164 }
165
match_pid(struct pids * pids,int pid)166 static bool match_pid(struct pids *pids, int pid)
167 {
168 while (pids) {
169 if (pids->pid == pid)
170 return true;
171 pids = pids->next;
172 }
173 return false;
174 }
175
callback(struct tep_event * event,struct tep_record * record,int cpu,void * data)176 static int callback(struct tep_event *event, struct tep_record *record, int cpu,
177 void *data)
178 {
179 struct trace_fields *fields = data;
180 struct tep_handle *tep = event->tep;
181 unsigned long long val;
182 int flags;
183 int type;
184 int pid;
185 int ret;
186
187 ret = tep_read_number_field(fields->common_pid, record->data, &val);
188 if (ret < 0)
189 return 0;
190
191 flags = tep_data_flags(tep, record);
192
193 /* Ignore events in interrupts */
194 if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ))
195 return 0;
196
197 /*
198 * First make sure that this event comes from a PID from
199 * this task (or a task woken by this task)
200 */
201 pid = val;
202 if (!match_pid(fields->pids, pid))
203 return 0;
204
205 type = tep_data_type(tep, record);
206
207 /*
208 * If this event is a kvm_exit, we have our PID
209 * and we can stop processing.
210 */
211 if (type == fields->kvm_exit->id) {
212 fields->found_pid = pid;
213 return -1;
214 }
215
216 if (type != fields->sched_waking->id)
217 return 0;
218
219 ret = tep_read_number_field(fields->sched_next, record->data, &val);
220 if (ret < 0)
221 return 0;
222
223 /* This is a task woken by our task or a chain of wake ups */
224 add_pid(&fields->pids, (int)val);
225 return 0;
226 }
227
find_tgid(int pid)228 static int find_tgid(int pid)
229 {
230 FILE *fp;
231 char *path;
232 char *buf = NULL;
233 char *save;
234 size_t l = 0;
235 int tgid = -1;
236
237 if (asprintf(&path, "/proc/%d/status", pid) < 0)
238 return -1;
239
240 fp = fopen(path, "r");
241 free(path);
242 if (!fp)
243 return -1;
244
245 while (getline(&buf, &l, fp) > 0) {
246 char *tok;
247
248 if (strncmp(buf, "Tgid:", 5) != 0)
249 continue;
250 tok = strtok_r(buf, ":", &save);
251 if (!tok)
252 continue;
253 tok = strtok_r(NULL, ":", &save);
254 if (!tok)
255 continue;
256 while (isspace(*tok))
257 tok++;
258 tgid = strtol(tok, NULL, 0);
259 break;
260 }
261 free(buf);
262 fclose(fp);
263
264 return tgid;
265 }
266
stop_trace_connect(struct tracefs_instance * open_instance)267 static int stop_trace_connect(struct tracefs_instance *open_instance)
268 {
269 const char *systems[] = { "kvm", "sched", NULL};
270 struct tep_handle *tep;
271 struct trace_fields trace_fields;
272 int tgid = -1;
273
274 if (!open_instance)
275 return -1;
276
277 /* The connection is finished, stop tracing, we have what we want */
278 tracefs_trace_off(open_instance);
279 tracefs_event_disable(open_instance, NULL, NULL);
280
281 tep = tracefs_local_events_system(NULL, systems);
282
283 trace_fields.found_pid = -1;
284 trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking");
285 if (!trace_fields.sched_waking)
286 goto out;
287 trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit");
288 if (!trace_fields.kvm_exit)
289 goto out;
290 trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking,
291 "common_pid");
292 if (!trace_fields.common_pid)
293 goto out;
294 trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking,
295 "pid");
296 if (!trace_fields.sched_next)
297 goto out;
298
299 trace_fields.pids = NULL;
300 add_pid(&trace_fields.pids, getpid());
301 tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields);
302 free_pids(trace_fields.pids);
303 out:
304 tracefs_instance_destroy(open_instance);
305 tracefs_instance_free(open_instance);
306
307 if (trace_fields.found_pid > 0)
308 tgid = find_tgid(trace_fields.found_pid);
309
310 return tgid;
311 }
312
313 /*
314 * In order to find the guest that is associated to the given cid,
315 * trace the sched_waking and kvm_exit events, connect to the cid
316 * (doesn't matter what port, use -1 to not connect to anything)
317 * and find what task gets woken up from this code and calls kvm_exit,
318 * then that is the task that is running the guest.
319 * Then look at the /proc/<guest-pid>/status file to find the task group
320 * id (Tgid), and this is the PID of the task running all the threads.
321 */
find_pid_by_cid(struct trace_guest * guest)322 static void find_pid_by_cid(struct trace_guest *guest)
323 {
324 struct tracefs_instance *instance;
325 int fd;
326
327 instance = start_trace_connect();
328 fd = trace_vsock_open(guest->cid, -1);
329 guest->pid = stop_trace_connect(instance);
330 /* Just in case! */
331 if (fd >= 0)
332 close(fd);
333 }
334
trace_get_guest(unsigned int cid,const char * name)335 struct trace_guest *trace_get_guest(unsigned int cid, const char *name)
336 {
337 struct trace_guest *guest = NULL;
338
339 if (name) {
340 guest = get_guest_by_name(name);
341 if (guest)
342 return guest;
343 }
344
345 if (cid > 0) {
346 guest = get_guest_by_cid(cid);
347 if (!guest && name)
348 guest = add_guest(cid, name);
349 }
350 return guest;
351 }
352
353 #define VM_CID_CMD "virsh dumpxml"
354 #define VM_CID_LINE "<cid auto="
355 #define VM_CID_ID "address='"
read_guest_cid(char * name)356 static void read_guest_cid(char *name)
357 {
358 char *cmd = NULL;
359 char line[512];
360 char *cid;
361 unsigned int cid_id = 0;
362 FILE *f;
363
364 asprintf(&cmd, "%s %s", VM_CID_CMD, name);
365 f = popen(cmd, "r");
366 free(cmd);
367 if (f == NULL)
368 return;
369
370 while (fgets(line, sizeof(line), f) != NULL) {
371 if (!strstr(line, VM_CID_LINE))
372 continue;
373 cid = strstr(line, VM_CID_ID);
374 if (!cid)
375 continue;
376 cid_id = strtol(cid + strlen(VM_CID_ID), NULL, 10);
377 if ((cid_id == INT_MIN || cid_id == INT_MAX) && errno == ERANGE)
378 continue;
379 add_guest(cid_id, name);
380 break;
381 }
382
383 /* close */
384 pclose(f);
385 }
386
387 #define VM_NAME_CMD "virsh list --name"
read_qemu_guests(void)388 void read_qemu_guests(void)
389 {
390 char name[256];
391 FILE *f;
392
393 f = popen(VM_NAME_CMD, "r");
394 if (f == NULL)
395 return;
396
397 while (fgets(name, sizeof(name), f) != NULL) {
398 if (name[0] == '\n')
399 continue;
400 if (name[strlen(name) - 1] == '\n')
401 name[strlen(name) - 1] = '\0';
402 read_guest_cid(name);
403 }
404
405 /* close */
406 pclose(f);
407 }
408
get_guest_vcpu_pid(unsigned int guest_cid,unsigned int guest_vcpu)409 int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu)
410 {
411 int i;
412
413 if (!guests)
414 return -1;
415
416 for (i = 0; i < guests_len; i++) {
417 if (guests[i].cpu_pid < 0 || guest_vcpu >= guests[i].cpu_max)
418 continue;
419 if (guest_cid == guests[i].cid)
420 return guests[i].cpu_pid[guest_vcpu];
421 }
422 return -1;
423 }
424
425 /**
426 * trace_add_guest_info - Add the guest info into the trace file option
427 * @handle: The file handle that the guest info option is added to
428 * @instance: The instance that that represents the guest
429 *
430 * Adds information about the guest from the @instance into an option
431 * for the @instance. It records the trace_id, the number of CPUs,
432 * as well as the PIDs of the host that represent the CPUs.
433 */
434 void
trace_add_guest_info(struct tracecmd_output * handle,struct buffer_instance * instance)435 trace_add_guest_info(struct tracecmd_output *handle, struct buffer_instance *instance)
436 {
437 unsigned long long trace_id;
438 struct trace_guest *guest;
439 const char *name;
440 char *buf, *p;
441 int cpus;
442 int size;
443 int pid;
444 int i;
445
446 if (is_network(instance)) {
447 name = instance->name;
448 cpus = instance->cpu_count;
449 trace_id = instance->trace_id;
450 } else {
451 guest = trace_get_guest(instance->cid, NULL);
452 if (!guest)
453 return;
454 cpus = guest->cpu_max;
455 name = guest->name;
456 /*
457 * If this is a proxy, the trace_id of the guest is
458 * in the guest descriptor (added in trace_tsync_as_host().
459 */
460 if (guest->trace_id)
461 trace_id = guest->trace_id;
462 else
463 trace_id = instance->trace_id;
464 }
465
466 size = strlen(name) + 1;
467 size += sizeof(long long); /* trace_id */
468 size += sizeof(int); /* cpu count */
469 size += cpus * 2 * sizeof(int); /* cpu,pid pair */
470
471 buf = calloc(1, size);
472 if (!buf)
473 return;
474 p = buf;
475 strcpy(p, name);
476 p += strlen(name) + 1;
477
478 memcpy(p, &trace_id, sizeof(long long));
479 p += sizeof(long long);
480
481 memcpy(p, &cpus, sizeof(int));
482 p += sizeof(int);
483 for (i = 0; i < cpus; i++) {
484 if (is_network(instance))
485 pid = -1;
486 else
487 pid = guest->cpu_pid[i];
488 memcpy(p, &i, sizeof(int));
489 p += sizeof(int);
490 memcpy(p, &pid, sizeof(int));
491 p += sizeof(int);
492 }
493
494 tracecmd_add_option(handle, TRACECMD_OPTION_GUEST, size, buf);
495 free(buf);
496 }
497