1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2021 Google Inc, Steven Rostedt <rostedt@goodmis.org>
4 * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
5 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 */
7 #include <stdio.h>
8 #include <stdlib.h>
9
10 #include "trace-local.h"
11 #include "trace-msg.h"
12
13 struct trace_mapping {
14 struct tep_event *kvm_entry;
15 struct tep_format_field *vcpu_id;
16 struct tep_format_field *common_pid;
17 int *pids;
18 int *map;
19 int *vcpu;
20 int max_cpus;
21 };
22
cmp_tmap_vcpu(const void * A,const void * B)23 static int cmp_tmap_vcpu(const void *A, const void *B)
24 {
25 const int *a = A;
26 const int *b = B;
27
28 if (*a < *b)
29 return -1;
30 return *a > *b;
31 }
32
map_kvm_vcpus(int guest_pid,struct trace_mapping * tmap)33 static int map_kvm_vcpus(int guest_pid, struct trace_mapping *tmap)
34 {
35 struct dirent *entry;
36 const char *debugfs;
37 char *vm_dir_str = NULL;
38 char *pid_file = NULL;
39 char *kvm_dir;
40 int pid_file_len;
41 bool found = false;
42 DIR *dir;
43 int ret = -1;
44 int i;
45
46 tmap->vcpu = malloc(sizeof(*tmap->vcpu) * tmap->max_cpus);
47 if (!tmap->vcpu)
48 return -1;
49
50 memset(tmap->vcpu, -1, sizeof(*tmap->vcpu) * tmap->max_cpus);
51
52 debugfs = tracefs_debug_dir();
53 if (!debugfs)
54 return -1;
55
56 if (asprintf(&kvm_dir, "%s/kvm", debugfs) < 0)
57 return -1;
58
59 dir = opendir(kvm_dir);
60 if (!dir)
61 goto out;
62
63 if (asprintf(&pid_file, "%d-", guest_pid) <= 0)
64 goto out;
65
66 pid_file_len = strlen(pid_file);
67
68 while ((entry = readdir(dir))) {
69 if (entry->d_type != DT_DIR ||
70 strncmp(entry->d_name, pid_file, pid_file_len) != 0)
71 continue;
72 if (asprintf(&vm_dir_str, "%s/%s", kvm_dir, entry->d_name) < 0)
73 goto out;
74 found = true;
75 break;
76 }
77 if (!found)
78 goto out;
79
80 closedir(dir);
81 dir = opendir(vm_dir_str);
82 if (!dir)
83 goto out;
84 i = 0;
85 while ((entry = readdir(dir))) {
86 if (entry->d_type != DT_DIR ||
87 strncmp(entry->d_name, "vcpu", 4))
88 continue;
89 if (i == tmap->max_cpus)
90 goto out;
91 tmap->vcpu[i] = strtol(entry->d_name + 4, NULL, 10);
92 i++;
93 }
94
95 if (i < tmap->max_cpus)
96 goto out;
97
98 qsort(tmap->vcpu, tmap->max_cpus, sizeof(*tmap->vcpu), cmp_tmap_vcpu);
99
100 ret = 0;
101
102 out:
103 if (dir)
104 closedir(dir);
105 free(vm_dir_str);
106 free(pid_file);
107 free(kvm_dir);
108
109 return ret;
110 }
111
map_vcpus(struct tep_event * event,struct tep_record * record,int cpu,void * context)112 static int map_vcpus(struct tep_event *event, struct tep_record *record,
113 int cpu, void *context)
114 {
115 struct trace_mapping *tmap = context;
116 unsigned long long val;
117 int *vcpu;
118 int type;
119 int pid;
120 int ret;
121 int i;
122
123 /* Do we have junk in the buffer? */
124 type = tep_data_type(event->tep, record);
125 if (type != tmap->kvm_entry->id)
126 return 0;
127
128 ret = tep_read_number_field(tmap->common_pid, record->data, &val);
129 if (ret < 0)
130 return 0;
131 pid = (int)val;
132
133 for (i = 0; tmap->pids[i] >= 0; i++) {
134 if (pid == tmap->pids[i])
135 break;
136 }
137 /* Is this thread one we care about ? */
138 if (tmap->pids[i] < 0)
139 return 0;
140
141 ret = tep_read_number_field(tmap->vcpu_id, record->data, &val);
142 if (ret < 0)
143 return 0;
144
145 cpu = (int)val;
146
147 vcpu = bsearch(&cpu, tmap->vcpu, tmap->max_cpus, sizeof(cpu), cmp_tmap_vcpu);
148 /* Sanity check, warn? */
149 if (!vcpu)
150 return 0;
151
152 cpu = vcpu - tmap->vcpu;
153
154 /* Already have this one? Should we check if it is the same? */
155 if (tmap->map[cpu] >= 0)
156 return 0;
157
158 tmap->map[cpu] = pid;
159
160 /* Did we get them all */
161 for (i = 0; i < tmap->max_cpus; i++) {
162 if (tmap->map[i] < 0)
163 break;
164 }
165
166 return i == tmap->max_cpus;
167 }
168
start_mapping_vcpus(struct trace_guest * guest)169 static void start_mapping_vcpus(struct trace_guest *guest)
170 {
171 char *pids = NULL;
172 char *t;
173 int len = 0;
174 int s;
175 int i;
176
177 if (!guest->task_pids)
178 return;
179
180 guest->instance = tracefs_instance_create("map_guest_pids");
181 if (!guest->instance)
182 return;
183
184 for (i = 0; guest->task_pids[i] >= 0; i++) {
185 s = snprintf(NULL, 0, "%d ", guest->task_pids[i]);
186 t = realloc(pids, len + s + 1);
187 if (!t) {
188 free(pids);
189 pids = NULL;
190 break;
191 }
192 pids = t;
193 sprintf(pids + len, "%d ", guest->task_pids[i]);
194 len += s;
195 }
196 if (pids) {
197 tracefs_instance_file_write(guest->instance, "set_event_pid", pids);
198 free(pids);
199 }
200 tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "1");
201 }
202
stop_mapping_vcpus(int cpu_count,struct trace_guest * guest)203 static void stop_mapping_vcpus(int cpu_count, struct trace_guest *guest)
204 {
205 struct trace_mapping tmap = { };
206 struct tep_handle *tep;
207 const char *systems[] = { "kvm", NULL };
208 int i;
209
210 if (!guest->instance)
211 return;
212
213 tmap.pids = guest->task_pids;
214 tmap.max_cpus = cpu_count;
215
216 tmap.map = malloc(sizeof(*tmap.map) * tmap.max_cpus);
217 if (!tmap.map)
218 return;
219
220 /* Check if the kvm vcpu mappings are the same */
221 if (map_kvm_vcpus(guest->pid, &tmap) < 0)
222 goto out;
223
224 for (i = 0; i < tmap.max_cpus; i++)
225 tmap.map[i] = -1;
226
227 tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "0");
228
229 tep = tracefs_local_events_system(NULL, systems);
230 if (!tep)
231 goto out;
232
233 tmap.kvm_entry = tep_find_event_by_name(tep, "kvm", "kvm_entry");
234 if (!tmap.kvm_entry)
235 goto out_free;
236
237 tmap.vcpu_id = tep_find_field(tmap.kvm_entry, "vcpu_id");
238 if (!tmap.vcpu_id)
239 goto out_free;
240
241 tmap.common_pid = tep_find_any_field(tmap.kvm_entry, "common_pid");
242 if (!tmap.common_pid)
243 goto out_free;
244
245 tracefs_iterate_raw_events(tep, guest->instance, NULL, 0, map_vcpus, &tmap);
246
247 for (i = 0; i < tmap.max_cpus; i++) {
248 if (tmap.map[i] < 0)
249 break;
250 }
251 /* We found all the mapped CPUs */
252 if (i == tmap.max_cpus) {
253 guest->cpu_pid = tmap.map;
254 guest->cpu_max = tmap.max_cpus;
255 tmap.map = NULL;
256 }
257
258 out_free:
259 tep_free(tep);
260 out:
261 free(tmap.map);
262 tracefs_instance_destroy(guest->instance);
263 tracefs_instance_free(guest->instance);
264 }
265
266 /**
267 * trace_tsync_as_host - tsync from the host side
268 * @fd: The descriptor to the peer for tsync
269 * @trace_id: The trace_id of the host
270 * @loop_interval: The loop interval for tsyncs that do periodic syncs
271 * @guest_id: The id for guests (negative if this is over network)
272 * @guest_cpus: The number of CPUs the guest has
273 * @proto_name: The protocol name to sync with
274 * @clock: The clock name to use for tracing
275 *
276 * Start the time synchronization from the host side.
277 * This will start the mapping of the virtual CPUs to host threads
278 * if it is a vsocket connection (not a network).
279 *
280 * Returns a pointer to the tsync descriptor on success or NULL on error.
281 */
282 struct tracecmd_time_sync *
trace_tsync_as_host(int fd,unsigned long long trace_id,int loop_interval,int guest_id,int guest_cpus,const char * proto_name,const char * clock)283 trace_tsync_as_host(int fd, unsigned long long trace_id,
284 int loop_interval, int guest_id,
285 int guest_cpus, const char *proto_name,
286 const char *clock)
287 {
288 struct tracecmd_time_sync *tsync;
289 struct trace_guest *guest;
290 int guest_pid = -1;
291
292 if (fd < 0)
293 return NULL;
294
295 if (guest_id >= 0) {
296 guest = trace_get_guest(guest_id, NULL);
297 if (guest == NULL)
298 return NULL;
299 guest_pid = guest->pid;
300 start_mapping_vcpus(guest);
301 }
302
303 tsync = tracecmd_tsync_with_guest(trace_id, loop_interval, fd,
304 guest_pid, guest_cpus, proto_name,
305 clock);
306
307 if (guest_id >= 0)
308 stop_mapping_vcpus(guest_cpus, guest);
309
310 return tsync;
311 }
312
313 /**
314 * trace_tsync_a_guest - tsync from the guest side
315 * @fd: The file descriptor to the peer for tsync
316 * @tsync_proto: The protocol name to sync with
317 * @clock: The clock name to use for tracing
318 * @remote_id: The id to differentiate the remote server with
319 * @loca_id: The id to differentiate the local machine with
320 *
321 * Start the time synchronization from the guest side.
322 *
323 * Returns a pointer to the tsync descriptor on success or NULL on error.
324 */
325 struct tracecmd_time_sync *
trace_tsync_as_guest(int fd,const char * tsync_proto,const char * clock,unsigned int remote_id,unsigned int local_id)326 trace_tsync_as_guest(int fd, const char *tsync_proto, const char *clock,
327 unsigned int remote_id, unsigned int local_id)
328 {
329 struct tracecmd_time_sync *tsync = NULL;
330
331 if (fd < 0)
332 return NULL;
333
334 tsync = tracecmd_tsync_with_host(fd, tsync_proto,
335 clock, remote_id, local_id);
336 if (!tsync) {
337 warning("Failed to negotiate timestamps synchronization with the host");
338 return NULL;
339 }
340
341 return tsync;
342 }
343