• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2024 Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <linux/arm-smccc.h>
8 #include <linux/percpu-defs.h>
9 #include <linux/panic_notifier.h>
10 #include <linux/trace_events.h>
11 #include <linux/tracefs.h>
12 
13 #include <asm/kvm_host.h>
14 #include <asm/kvm_hyptrace.h>
15 #include <asm/kvm_hypevents_defs.h>
16 #include <asm/kvm_pkvm.h>
17 
18 #include "hyp_constants.h"
19 #include "hyp_trace.h"
20 
21 #define RB_POLL_MS 100
22 
23 /* Same 10min used by clocksource when width is more than 32-bits */
24 #define CLOCK_MAX_CONVERSION_S 600
25 #define CLOCK_INIT_MS 100
26 #define CLOCK_POLL_MS 500
27 
28 #define TRACEFS_DIR "hypervisor"
29 #define TRACEFS_MODE_WRITE 0640
30 #define TRACEFS_MODE_READ 0440
31 
32 struct hyp_trace_clock {
33 	u64			cycles;
34 	u64			max_delta;
35 	u64			boot;
36 	u32			mult;
37 	u32			shift;
38 	struct delayed_work	work;
39 	struct completion	ready;
40 };
41 
42 static struct hyp_trace_buffer {
43 	struct hyp_trace_desc		*desc;
44 	struct ring_buffer_writer	writer;
45 	struct trace_buffer		*trace_buffer;
46 	size_t				desc_size;
47 	bool				tracing_on;
48 	int				nr_readers;
49 	struct mutex			lock;
50 	struct hyp_trace_clock		clock;
51 	struct ht_iterator		*printk_iter;
52 	bool				printk_on;
53 } hyp_trace_buffer = {
54 	.lock		= __MUTEX_INITIALIZER(hyp_trace_buffer.lock),
55 };
56 
57 static size_t hyp_trace_buffer_size = 7 << 10;
58 
59 static bool hyp_trace_panic __read_mostly;
60 
61 /* Number of pages the ring-buffer requires to accommodate for size */
62 #define NR_PAGES(size) \
63 	((PAGE_ALIGN(size) >> PAGE_SHIFT) + 1)
64 
hyp_trace_buffer_loaded(struct hyp_trace_buffer * hyp_buffer)65 static inline bool hyp_trace_buffer_loaded(struct hyp_trace_buffer *hyp_buffer)
66 {
67 	return !!hyp_buffer->trace_buffer;
68 }
69 
hyp_trace_buffer_used(struct hyp_trace_buffer * hyp_buffer)70 static inline bool hyp_trace_buffer_used(struct hyp_trace_buffer *hyp_buffer)
71 {
72 	return hyp_buffer->nr_readers || hyp_buffer->tracing_on ||
73 		!ring_buffer_empty(hyp_buffer->trace_buffer);
74 }
75 
set_ht_printk_on(char * str)76 static int set_ht_printk_on(char *str)
77 {
78 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
79 		hyp_trace_buffer.printk_on = true;
80 
81 	return 1;
82 }
83 __setup("hyp_trace_printk", set_ht_printk_on);
84 
__hyp_clock_work(struct work_struct * work)85 static void __hyp_clock_work(struct work_struct *work)
86 {
87 	struct delayed_work *dwork = to_delayed_work(work);
88 	struct hyp_trace_buffer *hyp_buffer;
89 	struct hyp_trace_clock *hyp_clock;
90 	struct system_time_snapshot snap;
91 	u64 rate, delta_cycles;
92 	u64 boot, delta_boot;
93 	u64 err = 0;
94 
95 	hyp_clock = container_of(dwork, struct hyp_trace_clock, work);
96 	hyp_buffer = container_of(hyp_clock, struct hyp_trace_buffer, clock);
97 
98 	ktime_get_snapshot(&snap);
99 	boot = ktime_to_ns(snap.boot);
100 
101 	delta_boot = boot - hyp_clock->boot;
102 	delta_cycles = snap.cycles - hyp_clock->cycles;
103 
104 	/* Compare hyp clock with the kernel boot clock */
105 	if (hyp_clock->mult) {
106 		u64 cur = delta_cycles;
107 
108 		cur *= hyp_clock->mult;
109 		cur >>= hyp_clock->shift;
110 		cur += hyp_clock->boot;
111 
112 		err = abs_diff(cur, boot);
113 
114 		/* No deviation, only update epoch if necessary */
115 		if (!err) {
116 			if (delta_cycles >= hyp_clock->max_delta)
117 				goto update_hyp;
118 
119 			goto resched;
120 		}
121 
122 		/* Warn if the error is above tracing precision (1us) */
123 		if (hyp_buffer->tracing_on && err > NSEC_PER_USEC)
124 			pr_warn_ratelimited("hyp trace clock off by %lluus\n",
125 					    err / NSEC_PER_USEC);
126 	}
127 
128 	rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot);
129 	clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift,
130 			       rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S);
131 
132 update_hyp:
133 	hyp_clock->max_delta = (U64_MAX / hyp_clock->mult) >> 1;
134 	hyp_clock->cycles = snap.cycles;
135 	hyp_clock->boot = boot;
136 	kvm_call_hyp_nvhe(__pkvm_update_clock_tracing, hyp_clock->mult,
137 			  hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles);
138 	complete(&hyp_clock->ready);
139 
140 	pr_debug("hyp trace clock update mult=%u shift=%u max_delta=%llu err=%llu\n",
141 		 hyp_clock->mult, hyp_clock->shift, hyp_clock->max_delta, err);
142 
143 resched:
144 	schedule_delayed_work(&hyp_clock->work,
145 			      msecs_to_jiffies(CLOCK_POLL_MS));
146 }
147 
hyp_clock_start(struct hyp_trace_buffer * hyp_buffer)148 static void hyp_clock_start(struct hyp_trace_buffer *hyp_buffer)
149 {
150 	struct hyp_trace_clock *hyp_clock = &hyp_buffer->clock;
151 	struct system_time_snapshot snap;
152 
153 	ktime_get_snapshot(&snap);
154 
155 	hyp_clock->boot = ktime_to_ns(snap.boot);
156 	hyp_clock->cycles = snap.cycles;
157 	hyp_clock->mult = 0;
158 
159 	init_completion(&hyp_clock->ready);
160 	INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work);
161 	schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS));
162 }
163 
hyp_clock_stop(struct hyp_trace_buffer * hyp_buffer)164 static void hyp_clock_stop(struct hyp_trace_buffer *hyp_buffer)
165 {
166 	struct hyp_trace_clock *hyp_clock = &hyp_buffer->clock;
167 
168 	cancel_delayed_work_sync(&hyp_clock->work);
169 }
170 
hyp_clock_wait(struct hyp_trace_buffer * hyp_buffer)171 static void hyp_clock_wait(struct hyp_trace_buffer *hyp_buffer)
172 {
173 	struct hyp_trace_clock *hyp_clock = &hyp_buffer->clock;
174 
175 	wait_for_completion(&hyp_clock->ready);
176 }
177 
__get_reader_page(int cpu)178 static int __get_reader_page(int cpu)
179 {
180 	/* we'd better no try to call the hyp if it has panic'ed */
181 	if (hyp_trace_panic)
182 		return 0;
183 
184 	return kvm_call_hyp_nvhe(__pkvm_swap_reader_tracing, cpu);
185 }
186 
__reset(int cpu)187 static int __reset(int cpu)
188 {
189 	return kvm_call_hyp_nvhe(__pkvm_reset_tracing, cpu);
190 }
191 
hyp_trace_free_pages(struct hyp_trace_desc * desc)192 static void hyp_trace_free_pages(struct hyp_trace_desc *desc)
193 {
194 	struct rb_page_desc *rb_desc;
195 	int cpu, id;
196 
197 	for_each_rb_page_desc(rb_desc, cpu, &desc->page_desc) {
198 		free_page(rb_desc->meta_va);
199 		for (id = 0; id < rb_desc->nr_page_va; id++)
200 			free_page(rb_desc->page_va[id]);
201 	}
202 }
203 
hyp_trace_alloc_pages(struct hyp_trace_desc * desc,size_t size)204 static int hyp_trace_alloc_pages(struct hyp_trace_desc *desc, size_t size)
205 {
206 	int err = 0, cpu, id, nr_pages = NR_PAGES(size);
207 	struct trace_page_desc *trace_desc;
208 	struct rb_page_desc *rb_desc;
209 
210 	trace_desc = &desc->page_desc;
211 	trace_desc->nr_cpus = 0;
212 
213 	rb_desc = (struct rb_page_desc *)&trace_desc->__data[0];
214 
215 	for_each_possible_cpu(cpu) {
216 		rb_desc->cpu = cpu;
217 		rb_desc->nr_page_va = 0;
218 		rb_desc->meta_va = (unsigned long)page_to_virt(alloc_page(GFP_KERNEL));
219 		if (!rb_desc->meta_va) {
220 			err = -ENOMEM;
221 			break;
222 		}
223 		for (id = 0; id < nr_pages; id++) {
224 			rb_desc->page_va[id] = (unsigned long)page_to_virt(alloc_page(GFP_KERNEL));
225 			if (!rb_desc->page_va[id]) {
226 				err = -ENOMEM;
227 				break;
228 			}
229 			rb_desc->nr_page_va++;
230 		}
231 		trace_desc->nr_cpus++;
232 		rb_desc = __next_rb_page_desc(rb_desc);
233 	}
234 
235 	if (err) {
236 		hyp_trace_free_pages(desc);
237 		return err;
238 	}
239 
240 	return 0;
241 }
242 
__load_page(unsigned long va)243 static int __load_page(unsigned long va)
244 {
245 	return kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn((void *)va), 1);
246 }
247 
__teardown_page(unsigned long va)248 static void __teardown_page(unsigned long va)
249 {
250 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, virt_to_pfn((void *)va), 1));
251 }
252 
hyp_trace_teardown_pages(struct hyp_trace_desc * desc,int last_cpu)253 static void hyp_trace_teardown_pages(struct hyp_trace_desc *desc,
254 				     int last_cpu)
255 {
256 	struct rb_page_desc *rb_desc;
257 	int cpu, id;
258 
259 	for_each_rb_page_desc(rb_desc, cpu, &desc->page_desc) {
260 		if (cpu > last_cpu)
261 			break;
262 		__teardown_page(rb_desc->meta_va);
263 		for (id = 0; id < rb_desc->nr_page_va; id++)
264 			__teardown_page(rb_desc->page_va[id]);
265 	}
266 }
267 
hyp_trace_load_pages(struct hyp_trace_desc * desc)268 static int hyp_trace_load_pages(struct hyp_trace_desc *desc)
269 {
270 	int last_loaded_cpu = 0, cpu, id, err = -EINVAL;
271 	struct rb_page_desc *rb_desc;
272 
273 	for_each_rb_page_desc(rb_desc, cpu, &desc->page_desc) {
274 		err = __load_page(rb_desc->meta_va);
275 		if (err)
276 			break;
277 
278 		for (id = 0; id < rb_desc->nr_page_va; id++) {
279 			err = __load_page(rb_desc->page_va[id]);
280 			if (err)
281 				break;
282 		}
283 
284 		if (!err)
285 			continue;
286 
287 		for (id--; id >= 0; id--)
288 			__teardown_page(rb_desc->page_va[id]);
289 
290 		last_loaded_cpu = cpu - 1;
291 
292 		break;
293 	}
294 
295 	if (!err)
296 		return 0;
297 
298 	hyp_trace_teardown_pages(desc, last_loaded_cpu);
299 
300 	return err;
301 }
302 
hyp_trace_buffer_load(struct hyp_trace_buffer * hyp_buffer,size_t size)303 static int hyp_trace_buffer_load(struct hyp_trace_buffer *hyp_buffer, size_t size)
304 {
305 	int ret, nr_pages = NR_PAGES(size);
306 	struct rb_page_desc *rbdesc;
307 	struct hyp_trace_desc *desc;
308 	size_t desc_size;
309 
310 	if (hyp_trace_buffer_loaded(hyp_buffer))
311 		return 0;
312 
313 	desc_size = size_add(offsetof(struct hyp_trace_desc, page_desc),
314 			     offsetof(struct trace_page_desc, __data));
315 	desc_size = size_add(desc_size,
316 			     size_mul(num_possible_cpus(),
317 				      struct_size(rbdesc, page_va, nr_pages)));
318 	if (desc_size == SIZE_MAX)
319 		return -E2BIG;
320 
321 	/*
322 	 * The hypervisor will unmap the descriptor from the host to protect the
323 	 * reading. Page granularity for the allocation ensures no other
324 	 * useful data will be unmapped.
325 	 */
326 	desc_size = PAGE_ALIGN(desc_size);
327 
328 	desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL);
329 	if (!desc)
330 		return -ENOMEM;
331 
332 	ret = hyp_trace_alloc_pages(desc, size);
333 	if (ret)
334 		goto err_free_desc;
335 
336 	ret = hyp_trace_load_pages(desc);
337 	if (ret)
338 		goto err_free_pages;
339 
340 	ret = kvm_call_refill_hyp_nvhe(__pkvm_load_tracing, (unsigned long)desc,
341 				       desc_size);
342 	if (ret)
343 		goto err_teardown_pages;
344 
345 	hyp_buffer->writer.pdesc = &desc->page_desc;
346 	hyp_buffer->writer.get_reader_page = __get_reader_page;
347 	hyp_buffer->writer.reset = __reset;
348 	hyp_buffer->trace_buffer = ring_buffer_reader(&hyp_buffer->writer);
349 	if (!hyp_buffer->trace_buffer) {
350 		ret = -ENOMEM;
351 		goto err_teardown_tracing;
352 	}
353 
354 	hyp_buffer->desc = desc;
355 	hyp_buffer->desc_size = desc_size;
356 
357 	return 0;
358 
359 err_teardown_tracing:
360 	kvm_call_hyp_nvhe(__pkvm_teardown_tracing);
361 err_teardown_pages:
362 	hyp_trace_teardown_pages(desc, INT_MAX);
363 err_free_pages:
364 	hyp_trace_free_pages(desc);
365 err_free_desc:
366 	free_pages_exact(desc, desc_size);
367 
368 	return ret;
369 }
370 
hyp_trace_buffer_teardown(struct hyp_trace_buffer * hyp_buffer)371 static void hyp_trace_buffer_teardown(struct hyp_trace_buffer *hyp_buffer)
372 {
373 	struct hyp_trace_desc *desc = hyp_buffer->desc;
374 	size_t desc_size = hyp_buffer->desc_size;
375 
376 	if (!hyp_trace_buffer_loaded(hyp_buffer))
377 		return;
378 
379 	if (hyp_trace_buffer_used(hyp_buffer))
380 		return;
381 
382 	if (kvm_call_hyp_nvhe(__pkvm_teardown_tracing))
383 		return;
384 
385 	ring_buffer_free(hyp_buffer->trace_buffer);
386 	hyp_trace_teardown_pages(desc, INT_MAX);
387 	hyp_trace_free_pages(desc);
388 	free_pages_exact(desc, desc_size);
389 	hyp_buffer->trace_buffer = NULL;
390 }
391 
hyp_trace_start(void)392 static int hyp_trace_start(void)
393 {
394 	struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
395 	int ret = 0;
396 
397 	mutex_lock(&hyp_buffer->lock);
398 
399 	if (hyp_buffer->tracing_on)
400 		goto out;
401 
402 	hyp_clock_start(hyp_buffer);
403 
404 	ret = hyp_trace_buffer_load(hyp_buffer, hyp_trace_buffer_size);
405 	if (ret)
406 		goto out;
407 
408 	hyp_clock_wait(hyp_buffer);
409 
410 	ret = kvm_call_hyp_nvhe(__pkvm_enable_tracing, true);
411 	if (ret) {
412 		hyp_trace_buffer_teardown(hyp_buffer);
413 		goto out;
414 	}
415 
416 	hyp_buffer->tracing_on = true;
417 
418 out:
419 	if (!hyp_buffer->tracing_on)
420 		hyp_clock_stop(hyp_buffer);
421 
422 	mutex_unlock(&hyp_buffer->lock);
423 
424 	return ret;
425 }
426 
hyp_trace_stop(void)427 static void hyp_trace_stop(void)
428 {
429 	struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
430 	int ret;
431 
432 	mutex_lock(&hyp_buffer->lock);
433 
434 	if (!hyp_buffer->tracing_on)
435 		goto end;
436 
437 	ret = kvm_call_hyp_nvhe(__pkvm_enable_tracing, false);
438 	if (!ret) {
439 		hyp_clock_stop(hyp_buffer);
440 		ring_buffer_poll_writer(hyp_buffer->trace_buffer,
441 					RING_BUFFER_ALL_CPUS);
442 		hyp_buffer->tracing_on = false;
443 		hyp_trace_buffer_teardown(hyp_buffer);
444 	}
445 
446 end:
447 	mutex_unlock(&hyp_buffer->lock);
448 }
449 
hyp_tracing_on(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)450 static ssize_t hyp_tracing_on(struct file *filp, const char __user *ubuf,
451 			      size_t cnt, loff_t *ppos)
452 {
453 	unsigned long val;
454 	int ret;
455 
456 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
457 	if (ret)
458 		return ret;
459 
460 	if (val)
461 		ret = hyp_trace_start();
462 	else
463 		hyp_trace_stop();
464 
465 	return ret ? ret : cnt;
466 }
467 
hyp_tracing_on_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)468 static ssize_t hyp_tracing_on_read(struct file *filp, char __user *ubuf,
469 				   size_t cnt, loff_t *ppos)
470 {
471 	char buf[3];
472 	int r;
473 
474 	mutex_lock(&hyp_trace_buffer.lock);
475 	r = sprintf(buf, "%d\n", hyp_trace_buffer.tracing_on);
476 	mutex_unlock(&hyp_trace_buffer.lock);
477 
478 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
479 }
480 
481 static const struct file_operations hyp_tracing_on_fops = {
482 	.write	= hyp_tracing_on,
483 	.read	= hyp_tracing_on_read,
484 };
485 
hyp_buffer_size(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)486 static ssize_t hyp_buffer_size(struct file *filp, const char __user *ubuf,
487 			       size_t cnt, loff_t *ppos)
488 {
489 	unsigned long val;
490 	int ret;
491 
492 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
493 	if (ret)
494 		return ret;
495 
496 	if (!val)
497 		return -EINVAL;
498 
499 	mutex_lock(&hyp_trace_buffer.lock);
500 	hyp_trace_buffer_size = val << 10; /* KB to B */
501 	mutex_unlock(&hyp_trace_buffer.lock);
502 
503 	return cnt;
504 }
505 
hyp_buffer_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)506 static ssize_t hyp_buffer_size_read(struct file *filp, char __user *ubuf,
507 				    size_t cnt, loff_t *ppos)
508 {
509 	char buf[64];
510 	int r;
511 
512 	mutex_lock(&hyp_trace_buffer.lock);
513 	r = sprintf(buf, "%lu (%s)\n", hyp_trace_buffer_size >> 10,
514 		    hyp_trace_buffer_loaded(&hyp_trace_buffer) ?
515 			"loaded" : "unloaded");
516 	mutex_unlock(&hyp_trace_buffer.lock);
517 
518 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
519 }
520 
521 static const struct file_operations hyp_buffer_size_fops = {
522 	.write	= hyp_buffer_size,
523 	.read	= hyp_buffer_size_read,
524 };
525 
ht_print_trace_time(struct ht_iterator * iter)526 static void ht_print_trace_time(struct ht_iterator *iter)
527 {
528 	unsigned long usecs_rem;
529 	u64 ts_ns = iter->ts;
530 
531 	do_div(ts_ns, 1000);
532 	usecs_rem = do_div(ts_ns, USEC_PER_SEC);
533 
534 	trace_seq_printf(&iter->seq, "%5lu.%06lu: ",
535 			 (unsigned long)ts_ns, usecs_rem);
536 }
537 
ht_print_trace_cpu(struct ht_iterator * iter)538 static void ht_print_trace_cpu(struct ht_iterator *iter)
539 {
540 	trace_seq_printf(&iter->seq, "[%03d]\t", iter->ent_cpu);
541 }
542 
ht_print_trace_fmt(struct ht_iterator * iter)543 static int ht_print_trace_fmt(struct ht_iterator *iter)
544 {
545 	struct hyp_event *e;
546 
547 	if (iter->lost_events)
548 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
549 				 iter->ent_cpu, iter->lost_events);
550 
551 	ht_print_trace_cpu(iter);
552 	ht_print_trace_time(iter);
553 
554 	e = hyp_trace_find_event(iter->ent->id);
555 	if (e)
556 		e->trace_func(iter);
557 	else
558 		trace_seq_printf(&iter->seq, "Unknown event id %d\n", iter->ent->id);
559 
560 	return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
561 };
562 
__ht_next_pipe_event(struct ht_iterator * iter)563 static struct ring_buffer_event *__ht_next_pipe_event(struct ht_iterator *iter)
564 {
565 	struct trace_buffer *trace_buffer = iter->hyp_buffer->trace_buffer;
566 	struct ring_buffer_event *evt = NULL;
567 	int cpu = iter->cpu;
568 
569 	if (cpu != RING_BUFFER_ALL_CPUS) {
570 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
571 			return NULL;
572 
573 		iter->ent_cpu = cpu;
574 
575 		return ring_buffer_peek(trace_buffer, cpu, &iter->ts,
576 					&iter->lost_events);
577 	}
578 
579 	iter->ts = LLONG_MAX;
580 	for_each_possible_cpu(cpu) {
581 		struct ring_buffer_event *_evt;
582 		unsigned long lost_events;
583 		u64 ts;
584 
585 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
586 			continue;
587 
588 		_evt = ring_buffer_peek(trace_buffer, cpu, &ts,
589 					&lost_events);
590 		if (!_evt)
591 			continue;
592 
593 		if (ts >= iter->ts)
594 			continue;
595 
596 		iter->ts = ts;
597 		iter->ent_cpu = cpu;
598 		iter->lost_events = lost_events;
599 		evt = _evt;
600 	}
601 
602 	return evt;
603 }
604 
ht_next_pipe_event(struct ht_iterator * iter)605 static void *ht_next_pipe_event(struct ht_iterator *iter)
606 {
607 	struct ring_buffer_event *event;
608 
609 	event = __ht_next_pipe_event(iter);
610 	if (!event)
611 		return NULL;
612 
613 	iter->ent = (struct hyp_entry_hdr *)&event->array[1];
614 	iter->ent_size = event->array[0];
615 
616 	return iter;
617 }
618 
619 static ssize_t
hyp_trace_pipe_read(struct file * file,char __user * ubuf,size_t cnt,loff_t * ppos)620 hyp_trace_pipe_read(struct file *file, char __user *ubuf,
621 		    size_t cnt, loff_t *ppos)
622 {
623 	struct ht_iterator *iter = (struct ht_iterator *)file->private_data;
624 	struct trace_buffer *trace_buffer = iter->hyp_buffer->trace_buffer;
625 	int ret;
626 
627 copy_to_user:
628 	ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
629 	if (ret != -EBUSY)
630 		return ret;
631 
632 	trace_seq_init(&iter->seq);
633 
634 	ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
635 	if (ret < 0)
636 		return ret;
637 
638 	while (ht_next_pipe_event(iter)) {
639 		int prev_len = iter->seq.seq.len;
640 
641 		if (ht_print_trace_fmt(iter)) {
642 			iter->seq.seq.len = prev_len;
643 			break;
644 		}
645 
646 		ring_buffer_consume(trace_buffer, iter->ent_cpu, NULL, NULL);
647 	}
648 
649 	goto copy_to_user;
650 }
651 
652 static void hyp_trace_buffer_printk(struct hyp_trace_buffer *hyp_buffer);
653 
__poll_writer(struct work_struct * work)654 static void __poll_writer(struct work_struct *work)
655 {
656 	struct delayed_work *dwork = to_delayed_work(work);
657 	struct ht_iterator *iter;
658 
659 	iter = container_of(dwork, struct ht_iterator, poll_work);
660 
661 	ring_buffer_poll_writer(iter->hyp_buffer->trace_buffer, iter->cpu);
662 
663 	hyp_trace_buffer_printk(iter->hyp_buffer);
664 
665 	schedule_delayed_work((struct delayed_work *)work,
666 			      msecs_to_jiffies(RB_POLL_MS));
667 }
668 
669 static struct ht_iterator *
ht_iterator_create(struct hyp_trace_buffer * hyp_buffer,int cpu)670 ht_iterator_create(struct hyp_trace_buffer *hyp_buffer, int cpu)
671 {
672 	struct ht_iterator *iter = NULL;
673 	int ret;
674 
675 	WARN_ON(!mutex_is_locked(&hyp_buffer->lock));
676 
677 	if (hyp_buffer->nr_readers == INT_MAX) {
678 		ret = -EBUSY;
679 		goto unlock;
680 	}
681 
682 	ret = hyp_trace_buffer_load(hyp_buffer, hyp_trace_buffer_size);
683 	if (ret)
684 		goto unlock;
685 
686 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
687 	if (!iter) {
688 		ret = -ENOMEM;
689 		goto unlock;
690 	}
691 	iter->hyp_buffer = hyp_buffer;
692 	iter->cpu = cpu;
693 	trace_seq_init(&iter->seq);
694 
695 	ret = ring_buffer_poll_writer(hyp_buffer->trace_buffer, cpu);
696 	if (ret)
697 		goto unlock;
698 
699 	INIT_DELAYED_WORK(&iter->poll_work, __poll_writer);
700 	schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(RB_POLL_MS));
701 
702 	hyp_buffer->nr_readers++;
703 
704 unlock:
705 	if (ret) {
706 		hyp_trace_buffer_teardown(hyp_buffer);
707 		kfree(iter);
708 		iter = NULL;
709 	}
710 
711 	return iter;
712 }
713 
hyp_trace_pipe_open(struct inode * inode,struct file * file)714 static int hyp_trace_pipe_open(struct inode *inode, struct file *file)
715 {
716 	struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
717 	int cpu = (s64)inode->i_private;
718 
719 	mutex_lock(&hyp_buffer->lock);
720 
721 	file->private_data = ht_iterator_create(hyp_buffer, cpu);
722 
723 	mutex_unlock(&hyp_buffer->lock);
724 
725 	return file->private_data ? 0 : -EINVAL;
726 }
727 
hyp_trace_pipe_release(struct inode * inode,struct file * file)728 static int hyp_trace_pipe_release(struct inode *inode, struct file *file)
729 {
730 	struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
731 	struct ht_iterator *iter = file->private_data;
732 
733 	cancel_delayed_work_sync(&iter->poll_work);
734 
735 	mutex_lock(&hyp_buffer->lock);
736 
737 	WARN_ON(--hyp_buffer->nr_readers < 0);
738 
739 	hyp_trace_buffer_teardown(hyp_buffer);
740 
741 	mutex_unlock(&hyp_buffer->lock);
742 
743 	kfree(iter);
744 
745 	return 0;
746 }
747 
748 static const struct file_operations hyp_trace_pipe_fops = {
749 	.open           = hyp_trace_pipe_open,
750 	.read           = hyp_trace_pipe_read,
751 	.release        = hyp_trace_pipe_release,
752 };
753 
754 static ssize_t
hyp_trace_raw_read(struct file * file,char __user * ubuf,size_t cnt,loff_t * ppos)755 hyp_trace_raw_read(struct file *file, char __user *ubuf,
756 		   size_t cnt, loff_t *ppos)
757 {
758 	struct ht_iterator *iter = (struct ht_iterator *)file->private_data;
759 	size_t size;
760 	int ret;
761 	void *page_data;
762 
763 	if (iter->copy_leftover)
764 		goto read;
765 
766 again:
767 	ret = ring_buffer_read_page(iter->hyp_buffer->trace_buffer,
768 				    (struct buffer_data_read_page *)iter->spare,
769 				    cnt, iter->cpu, 0);
770 	if (ret < 0) {
771 		if (!ring_buffer_empty_cpu(iter->hyp_buffer->trace_buffer,
772 					   iter->cpu))
773 			return 0;
774 
775 		ret = ring_buffer_wait(iter->hyp_buffer->trace_buffer,
776 				       iter->cpu, 0, NULL, NULL);
777 		if (ret < 0)
778 			return ret;
779 
780 		goto again;
781 	}
782 
783 	iter->copy_leftover = 0;
784 
785 read:
786 	size = PAGE_SIZE - iter->copy_leftover;
787 	if (size > cnt)
788 		size = cnt;
789 
790 	page_data = ring_buffer_read_page_data(
791 		(struct buffer_data_read_page *)iter->spare);
792 	ret = copy_to_user(ubuf, page_data + PAGE_SIZE - size, size);
793 	if (ret == size)
794 		return -EFAULT;
795 
796 	size -= ret;
797 	*ppos += size;
798 	iter->copy_leftover = ret;
799 
800 	return size;
801 }
802 
hyp_trace_raw_open(struct inode * inode,struct file * file)803 static int hyp_trace_raw_open(struct inode *inode, struct file *file)
804 {
805 	int ret = hyp_trace_pipe_open(inode, file);
806 	struct ht_iterator *iter;
807 
808 	if (ret)
809 		return ret;
810 
811 	iter = file->private_data;
812 	iter->spare = ring_buffer_alloc_read_page(iter->hyp_buffer->trace_buffer,
813 						  iter->cpu);
814 	if (IS_ERR(iter->spare)) {
815 		ret = PTR_ERR(iter->spare);
816 		iter->spare = NULL;
817 		return ret;
818 	}
819 
820 	return 0;
821 }
822 
hyp_trace_raw_release(struct inode * inode,struct file * file)823 static int hyp_trace_raw_release(struct inode *inode, struct file *file)
824 {
825 	struct ht_iterator *iter = file->private_data;
826 
827 	ring_buffer_free_read_page(iter->hyp_buffer->trace_buffer, iter->cpu,
828 				   iter->spare);
829 
830 	return hyp_trace_pipe_release(inode, file);
831 }
832 
833 static const struct file_operations hyp_trace_raw_fops = {
834 	.open           = hyp_trace_raw_open,
835 	.read           = hyp_trace_raw_read,
836 	.release        = hyp_trace_raw_release,
837 };
838 
hyp_trace_reset(int cpu)839 static void hyp_trace_reset(int cpu)
840 {
841 	struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
842 
843 	mutex_lock(&hyp_buffer->lock);
844 
845 	if (!hyp_trace_buffer_loaded(hyp_buffer))
846 		goto out;
847 
848 	if (cpu == RING_BUFFER_ALL_CPUS)
849 		ring_buffer_reset(hyp_buffer->trace_buffer);
850 	else
851 		ring_buffer_reset_cpu(hyp_buffer->trace_buffer, cpu);
852 
853 out:
854 	mutex_unlock(&hyp_buffer->lock);
855 }
856 
hyp_trace_open(struct inode * inode,struct file * file)857 static int hyp_trace_open(struct inode *inode, struct file *file)
858 {
859 	int cpu = (s64)inode->i_private;
860 
861 	if (file->f_mode & FMODE_WRITE)
862 		hyp_trace_reset(cpu);
863 
864 	return 0;
865 }
866 
hyp_trace_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)867 static ssize_t hyp_trace_write(struct file *filp, const char __user *ubuf,
868 			       size_t count, loff_t *ppos)
869 {
870 	return count;
871 }
872 
873 static const struct file_operations hyp_trace_fops = {
874 	.open           = hyp_trace_open,
875 	.write          = hyp_trace_write,
876 	.release        = NULL,
877 };
878 
hyp_trace_clock_show(struct seq_file * m,void * v)879 static int hyp_trace_clock_show(struct seq_file *m, void *v)
880 {
881 	seq_puts(m, "[boot]\n");
882 
883 	return 0;
884 }
885 DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock);
886 
887 #ifdef CONFIG_PKVM_SELFTESTS
selftest_event_open(struct inode * inode,struct file * file)888 static int selftest_event_open(struct inode *inode, struct file *file)
889 {
890 	if (file->f_mode & FMODE_WRITE)
891 		return kvm_call_hyp_nvhe(__pkvm_selftest_event);
892 
893 	return 0;
894 }
895 
selftest_event_write(struct file * f,const char __user * buf,size_t cnt,loff_t * pos)896 static ssize_t selftest_event_write(struct file *f, const char __user *buf,
897 				    size_t cnt, loff_t *pos)
898 {
899 	return cnt;
900 }
901 
902 static const struct file_operations selftest_event_fops = {
903 	.open	= selftest_event_open,
904 	.write	= selftest_event_write,
905 };
906 
hyp_trace_init_testing_tracefs(struct dentry * root)907 static void hyp_trace_init_testing_tracefs(struct dentry *root)
908 {
909 	tracefs_create_file("selftest_event", TRACEFS_MODE_WRITE, root, NULL,
910 			    &selftest_event_fops);
911 }
912 #else
hyp_trace_init_testing_tracefs(struct dentry * root)913 static void hyp_trace_init_testing_tracefs(struct dentry *root) { }
914 #endif
915 
hyp_trace_buffer_printk_init(struct hyp_trace_buffer * hyp_buffer)916 static int hyp_trace_buffer_printk_init(struct hyp_trace_buffer *hyp_buffer)
917 {
918 	int ret = 0;
919 
920 	mutex_lock(&hyp_buffer->lock);
921 
922 	if (hyp_buffer->printk_iter)
923 		goto unlock;
924 
925 	hyp_buffer->printk_iter = ht_iterator_create(hyp_buffer,
926 						     RING_BUFFER_ALL_CPUS);
927 	if (!hyp_buffer->printk_iter)
928 		ret = -EINVAL;
929 unlock:
930 	mutex_unlock(&hyp_buffer->lock);
931 
932 	return ret;
933 }
934 
hyp_trace_buffer_printk(struct hyp_trace_buffer * hyp_buffer)935 static void hyp_trace_buffer_printk(struct hyp_trace_buffer *hyp_buffer)
936 {
937 	struct ht_iterator *ht_iter = hyp_buffer->printk_iter;
938 
939 	if (!hyp_trace_buffer.printk_on)
940 		return;
941 
942 	trace_seq_init(&ht_iter->seq);
943 	while (ht_next_pipe_event(ht_iter)) {
944 		ht_print_trace_fmt(ht_iter);
945 
946 		/* Nothing has been written in the seq_buf */
947 		if (!ht_iter->seq.seq.len)
948 			return;
949 
950 		ht_iter->seq.buffer[ht_iter->seq.seq.len] = '\0';
951 		printk("%s", ht_iter->seq.buffer);
952 
953 		ht_iter->seq.seq.len = 0;
954 		ring_buffer_consume(hyp_buffer->trace_buffer, ht_iter->ent_cpu,
955 				    NULL, NULL);
956 	}
957 }
958 
hyp_trace_panic_handler(struct notifier_block * self,unsigned long ev,void * v)959 static int hyp_trace_panic_handler(struct notifier_block *self,
960 				   unsigned long ev, void *v)
961 {
962 #ifdef CONFIG_PKVM_DUMP_TRACE_ON_PANIC
963 	if (!hyp_trace_buffer_loaded(&hyp_trace_buffer) ||
964 	    !hyp_trace_buffer.printk_iter)
965 		return NOTIFY_DONE;
966 
967 	if (!strncmp("HYP panic:", v, 10))
968 		hyp_trace_panic = true;
969 
970 	ring_buffer_poll_writer(hyp_trace_buffer.trace_buffer, RING_BUFFER_ALL_CPUS);
971 	hyp_trace_buffer_printk(&hyp_trace_buffer);
972 #endif
973 	return NOTIFY_DONE;
974 }
975 
976 static struct notifier_block hyp_trace_panic_notifier = {
977 	.notifier_call = hyp_trace_panic_handler,
978 	.priority = INT_MAX - 1,
979 };
980 
hyp_trace_enable_event_early(void)981 void hyp_trace_enable_event_early(void)
982 {
983 	if (hyp_event_early_probe()) {
984 		int err = hyp_trace_start();
985 
986 		if (err)
987 			pr_warn("Failed to start early events tracing: %d\n", err);
988 	}
989 }
990 
hyp_trace_init_tracefs(void)991 int hyp_trace_init_tracefs(void)
992 {
993 	struct dentry *root, *per_cpu_root;
994 	char per_cpu_name[16];
995 	long cpu;
996 
997 	if (!is_protected_kvm_enabled())
998 		return 0;
999 
1000 	root = tracefs_create_dir(TRACEFS_DIR, NULL);
1001 	if (!root) {
1002 		pr_err("Failed to create tracefs "TRACEFS_DIR"/\n");
1003 		return -ENODEV;
1004 	}
1005 
1006 	tracefs_create_file("tracing_on", TRACEFS_MODE_WRITE, root, NULL,
1007 			    &hyp_tracing_on_fops);
1008 
1009 	tracefs_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, root, NULL,
1010 			    &hyp_buffer_size_fops);
1011 
1012 	tracefs_create_file("trace_pipe", TRACEFS_MODE_WRITE, root,
1013 			    (void *)RING_BUFFER_ALL_CPUS, &hyp_trace_pipe_fops);
1014 
1015 	tracefs_create_file("trace", TRACEFS_MODE_WRITE, root,
1016 			    (void *)RING_BUFFER_ALL_CPUS, &hyp_trace_fops);
1017 
1018 	tracefs_create_file("trace_clock", TRACEFS_MODE_READ, root, NULL,
1019 			    &hyp_trace_clock_fops);
1020 
1021 	per_cpu_root = tracefs_create_dir("per_cpu", root);
1022 	if (!per_cpu_root) {
1023 		pr_err("Failed to create tracefs folder "TRACEFS_DIR"/per_cpu/\n");
1024 		return -ENODEV;
1025 	}
1026 
1027 	for_each_possible_cpu(cpu) {
1028 		struct dentry *per_cpu_dir;
1029 
1030 		snprintf(per_cpu_name, sizeof(per_cpu_name), "cpu%ld", cpu);
1031 		per_cpu_dir = tracefs_create_dir(per_cpu_name, per_cpu_root);
1032 		if (!per_cpu_dir) {
1033 			pr_warn("Failed to create tracefs "TRACEFS_DIR"/per_cpu/cpu%ld\n",
1034 				cpu);
1035 			continue;
1036 		}
1037 
1038 		tracefs_create_file("trace_pipe", TRACEFS_MODE_READ, per_cpu_dir,
1039 				    (void *)cpu, &hyp_trace_pipe_fops);
1040 
1041 		tracefs_create_file("trace_pipe_raw", TRACEFS_MODE_READ, per_cpu_dir,
1042 				    (void *)cpu, &hyp_trace_raw_fops);
1043 
1044 		tracefs_create_file("trace", TRACEFS_MODE_WRITE, per_cpu_dir,
1045 				    (void *)cpu, &hyp_trace_fops);
1046 	}
1047 
1048 	hyp_trace_init_event_tracefs(root);
1049 
1050 	hyp_trace_enable_event_early();
1051 
1052 	hyp_trace_init_testing_tracefs(root);
1053 
1054 	if (hyp_trace_buffer.printk_on &&
1055 	    hyp_trace_buffer_printk_init(&hyp_trace_buffer))
1056 		pr_warn("Failed to init ht_printk");
1057 
1058 	atomic_notifier_chain_register(&panic_notifier_list, &hyp_trace_panic_notifier);
1059 
1060 	return 0;
1061 }
1062