1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2024 Google LLC
4 * Author: Vincent Donnefort <vdonnefort@google.com>
5 */
6
7 #include <linux/arm-smccc.h>
8 #include <linux/percpu-defs.h>
9 #include <linux/panic_notifier.h>
10 #include <linux/trace_events.h>
11 #include <linux/tracefs.h>
12
13 #include <asm/kvm_host.h>
14 #include <asm/kvm_hyptrace.h>
15 #include <asm/kvm_hypevents_defs.h>
16 #include <asm/kvm_pkvm.h>
17
18 #include "hyp_constants.h"
19 #include "hyp_trace.h"
20
21 #define RB_POLL_MS 100
22
23 /* Same 10min used by clocksource when width is more than 32-bits */
24 #define CLOCK_MAX_CONVERSION_S 600
25 #define CLOCK_INIT_MS 100
26 #define CLOCK_POLL_MS 500
27
28 #define TRACEFS_DIR "hypervisor"
29 #define TRACEFS_MODE_WRITE 0640
30 #define TRACEFS_MODE_READ 0440
31
32 struct hyp_trace_clock {
33 u64 cycles;
34 u64 max_delta;
35 u64 boot;
36 u32 mult;
37 u32 shift;
38 struct delayed_work work;
39 struct completion ready;
40 };
41
42 static struct hyp_trace_buffer {
43 struct hyp_trace_desc *desc;
44 struct ring_buffer_writer writer;
45 struct trace_buffer *trace_buffer;
46 size_t desc_size;
47 bool tracing_on;
48 int nr_readers;
49 struct mutex lock;
50 struct hyp_trace_clock clock;
51 struct ht_iterator *printk_iter;
52 bool printk_on;
53 } hyp_trace_buffer = {
54 .lock = __MUTEX_INITIALIZER(hyp_trace_buffer.lock),
55 };
56
57 static size_t hyp_trace_buffer_size = 7 << 10;
58
59 static bool hyp_trace_panic __read_mostly;
60
61 /* Number of pages the ring-buffer requires to accommodate for size */
62 #define NR_PAGES(size) \
63 ((PAGE_ALIGN(size) >> PAGE_SHIFT) + 1)
64
hyp_trace_buffer_loaded(struct hyp_trace_buffer * hyp_buffer)65 static inline bool hyp_trace_buffer_loaded(struct hyp_trace_buffer *hyp_buffer)
66 {
67 return !!hyp_buffer->trace_buffer;
68 }
69
hyp_trace_buffer_used(struct hyp_trace_buffer * hyp_buffer)70 static inline bool hyp_trace_buffer_used(struct hyp_trace_buffer *hyp_buffer)
71 {
72 return hyp_buffer->nr_readers || hyp_buffer->tracing_on ||
73 !ring_buffer_empty(hyp_buffer->trace_buffer);
74 }
75
set_ht_printk_on(char * str)76 static int set_ht_printk_on(char *str)
77 {
78 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
79 hyp_trace_buffer.printk_on = true;
80
81 return 1;
82 }
83 __setup("hyp_trace_printk", set_ht_printk_on);
84
__hyp_clock_work(struct work_struct * work)85 static void __hyp_clock_work(struct work_struct *work)
86 {
87 struct delayed_work *dwork = to_delayed_work(work);
88 struct hyp_trace_buffer *hyp_buffer;
89 struct hyp_trace_clock *hyp_clock;
90 struct system_time_snapshot snap;
91 u64 rate, delta_cycles;
92 u64 boot, delta_boot;
93 u64 err = 0;
94
95 hyp_clock = container_of(dwork, struct hyp_trace_clock, work);
96 hyp_buffer = container_of(hyp_clock, struct hyp_trace_buffer, clock);
97
98 ktime_get_snapshot(&snap);
99 boot = ktime_to_ns(snap.boot);
100
101 delta_boot = boot - hyp_clock->boot;
102 delta_cycles = snap.cycles - hyp_clock->cycles;
103
104 /* Compare hyp clock with the kernel boot clock */
105 if (hyp_clock->mult) {
106 u64 cur = delta_cycles;
107
108 cur *= hyp_clock->mult;
109 cur >>= hyp_clock->shift;
110 cur += hyp_clock->boot;
111
112 err = abs_diff(cur, boot);
113
114 /* No deviation, only update epoch if necessary */
115 if (!err) {
116 if (delta_cycles >= hyp_clock->max_delta)
117 goto update_hyp;
118
119 goto resched;
120 }
121
122 /* Warn if the error is above tracing precision (1us) */
123 if (hyp_buffer->tracing_on && err > NSEC_PER_USEC)
124 pr_warn_ratelimited("hyp trace clock off by %lluus\n",
125 err / NSEC_PER_USEC);
126 }
127
128 rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot);
129 clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift,
130 rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S);
131
132 update_hyp:
133 hyp_clock->max_delta = (U64_MAX / hyp_clock->mult) >> 1;
134 hyp_clock->cycles = snap.cycles;
135 hyp_clock->boot = boot;
136 kvm_call_hyp_nvhe(__pkvm_update_clock_tracing, hyp_clock->mult,
137 hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles);
138 complete(&hyp_clock->ready);
139
140 pr_debug("hyp trace clock update mult=%u shift=%u max_delta=%llu err=%llu\n",
141 hyp_clock->mult, hyp_clock->shift, hyp_clock->max_delta, err);
142
143 resched:
144 schedule_delayed_work(&hyp_clock->work,
145 msecs_to_jiffies(CLOCK_POLL_MS));
146 }
147
hyp_clock_start(struct hyp_trace_buffer * hyp_buffer)148 static void hyp_clock_start(struct hyp_trace_buffer *hyp_buffer)
149 {
150 struct hyp_trace_clock *hyp_clock = &hyp_buffer->clock;
151 struct system_time_snapshot snap;
152
153 ktime_get_snapshot(&snap);
154
155 hyp_clock->boot = ktime_to_ns(snap.boot);
156 hyp_clock->cycles = snap.cycles;
157 hyp_clock->mult = 0;
158
159 init_completion(&hyp_clock->ready);
160 INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work);
161 schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS));
162 }
163
hyp_clock_stop(struct hyp_trace_buffer * hyp_buffer)164 static void hyp_clock_stop(struct hyp_trace_buffer *hyp_buffer)
165 {
166 struct hyp_trace_clock *hyp_clock = &hyp_buffer->clock;
167
168 cancel_delayed_work_sync(&hyp_clock->work);
169 }
170
hyp_clock_wait(struct hyp_trace_buffer * hyp_buffer)171 static void hyp_clock_wait(struct hyp_trace_buffer *hyp_buffer)
172 {
173 struct hyp_trace_clock *hyp_clock = &hyp_buffer->clock;
174
175 wait_for_completion(&hyp_clock->ready);
176 }
177
__get_reader_page(int cpu)178 static int __get_reader_page(int cpu)
179 {
180 /* we'd better no try to call the hyp if it has panic'ed */
181 if (hyp_trace_panic)
182 return 0;
183
184 return kvm_call_hyp_nvhe(__pkvm_swap_reader_tracing, cpu);
185 }
186
__reset(int cpu)187 static int __reset(int cpu)
188 {
189 return kvm_call_hyp_nvhe(__pkvm_reset_tracing, cpu);
190 }
191
hyp_trace_free_pages(struct hyp_trace_desc * desc)192 static void hyp_trace_free_pages(struct hyp_trace_desc *desc)
193 {
194 struct rb_page_desc *rb_desc;
195 int cpu, id;
196
197 for_each_rb_page_desc(rb_desc, cpu, &desc->page_desc) {
198 free_page(rb_desc->meta_va);
199 for (id = 0; id < rb_desc->nr_page_va; id++)
200 free_page(rb_desc->page_va[id]);
201 }
202 }
203
hyp_trace_alloc_pages(struct hyp_trace_desc * desc,size_t size)204 static int hyp_trace_alloc_pages(struct hyp_trace_desc *desc, size_t size)
205 {
206 int err = 0, cpu, id, nr_pages = NR_PAGES(size);
207 struct trace_page_desc *trace_desc;
208 struct rb_page_desc *rb_desc;
209
210 trace_desc = &desc->page_desc;
211 trace_desc->nr_cpus = 0;
212
213 rb_desc = (struct rb_page_desc *)&trace_desc->__data[0];
214
215 for_each_possible_cpu(cpu) {
216 rb_desc->cpu = cpu;
217 rb_desc->nr_page_va = 0;
218 rb_desc->meta_va = (unsigned long)page_to_virt(alloc_page(GFP_KERNEL));
219 if (!rb_desc->meta_va) {
220 err = -ENOMEM;
221 break;
222 }
223 for (id = 0; id < nr_pages; id++) {
224 rb_desc->page_va[id] = (unsigned long)page_to_virt(alloc_page(GFP_KERNEL));
225 if (!rb_desc->page_va[id]) {
226 err = -ENOMEM;
227 break;
228 }
229 rb_desc->nr_page_va++;
230 }
231 trace_desc->nr_cpus++;
232 rb_desc = __next_rb_page_desc(rb_desc);
233 }
234
235 if (err) {
236 hyp_trace_free_pages(desc);
237 return err;
238 }
239
240 return 0;
241 }
242
__load_page(unsigned long va)243 static int __load_page(unsigned long va)
244 {
245 return kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn((void *)va), 1);
246 }
247
__teardown_page(unsigned long va)248 static void __teardown_page(unsigned long va)
249 {
250 WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, virt_to_pfn((void *)va), 1));
251 }
252
hyp_trace_teardown_pages(struct hyp_trace_desc * desc,int last_cpu)253 static void hyp_trace_teardown_pages(struct hyp_trace_desc *desc,
254 int last_cpu)
255 {
256 struct rb_page_desc *rb_desc;
257 int cpu, id;
258
259 for_each_rb_page_desc(rb_desc, cpu, &desc->page_desc) {
260 if (cpu > last_cpu)
261 break;
262 __teardown_page(rb_desc->meta_va);
263 for (id = 0; id < rb_desc->nr_page_va; id++)
264 __teardown_page(rb_desc->page_va[id]);
265 }
266 }
267
hyp_trace_load_pages(struct hyp_trace_desc * desc)268 static int hyp_trace_load_pages(struct hyp_trace_desc *desc)
269 {
270 int last_loaded_cpu = 0, cpu, id, err = -EINVAL;
271 struct rb_page_desc *rb_desc;
272
273 for_each_rb_page_desc(rb_desc, cpu, &desc->page_desc) {
274 err = __load_page(rb_desc->meta_va);
275 if (err)
276 break;
277
278 for (id = 0; id < rb_desc->nr_page_va; id++) {
279 err = __load_page(rb_desc->page_va[id]);
280 if (err)
281 break;
282 }
283
284 if (!err)
285 continue;
286
287 for (id--; id >= 0; id--)
288 __teardown_page(rb_desc->page_va[id]);
289
290 last_loaded_cpu = cpu - 1;
291
292 break;
293 }
294
295 if (!err)
296 return 0;
297
298 hyp_trace_teardown_pages(desc, last_loaded_cpu);
299
300 return err;
301 }
302
hyp_trace_buffer_load(struct hyp_trace_buffer * hyp_buffer,size_t size)303 static int hyp_trace_buffer_load(struct hyp_trace_buffer *hyp_buffer, size_t size)
304 {
305 int ret, nr_pages = NR_PAGES(size);
306 struct rb_page_desc *rbdesc;
307 struct hyp_trace_desc *desc;
308 size_t desc_size;
309
310 if (hyp_trace_buffer_loaded(hyp_buffer))
311 return 0;
312
313 desc_size = size_add(offsetof(struct hyp_trace_desc, page_desc),
314 offsetof(struct trace_page_desc, __data));
315 desc_size = size_add(desc_size,
316 size_mul(num_possible_cpus(),
317 struct_size(rbdesc, page_va, nr_pages)));
318 if (desc_size == SIZE_MAX)
319 return -E2BIG;
320
321 /*
322 * The hypervisor will unmap the descriptor from the host to protect the
323 * reading. Page granularity for the allocation ensures no other
324 * useful data will be unmapped.
325 */
326 desc_size = PAGE_ALIGN(desc_size);
327
328 desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL);
329 if (!desc)
330 return -ENOMEM;
331
332 ret = hyp_trace_alloc_pages(desc, size);
333 if (ret)
334 goto err_free_desc;
335
336 ret = hyp_trace_load_pages(desc);
337 if (ret)
338 goto err_free_pages;
339
340 ret = kvm_call_refill_hyp_nvhe(__pkvm_load_tracing, (unsigned long)desc,
341 desc_size);
342 if (ret)
343 goto err_teardown_pages;
344
345 hyp_buffer->writer.pdesc = &desc->page_desc;
346 hyp_buffer->writer.get_reader_page = __get_reader_page;
347 hyp_buffer->writer.reset = __reset;
348 hyp_buffer->trace_buffer = ring_buffer_reader(&hyp_buffer->writer);
349 if (!hyp_buffer->trace_buffer) {
350 ret = -ENOMEM;
351 goto err_teardown_tracing;
352 }
353
354 hyp_buffer->desc = desc;
355 hyp_buffer->desc_size = desc_size;
356
357 return 0;
358
359 err_teardown_tracing:
360 kvm_call_hyp_nvhe(__pkvm_teardown_tracing);
361 err_teardown_pages:
362 hyp_trace_teardown_pages(desc, INT_MAX);
363 err_free_pages:
364 hyp_trace_free_pages(desc);
365 err_free_desc:
366 free_pages_exact(desc, desc_size);
367
368 return ret;
369 }
370
hyp_trace_buffer_teardown(struct hyp_trace_buffer * hyp_buffer)371 static void hyp_trace_buffer_teardown(struct hyp_trace_buffer *hyp_buffer)
372 {
373 struct hyp_trace_desc *desc = hyp_buffer->desc;
374 size_t desc_size = hyp_buffer->desc_size;
375
376 if (!hyp_trace_buffer_loaded(hyp_buffer))
377 return;
378
379 if (hyp_trace_buffer_used(hyp_buffer))
380 return;
381
382 if (kvm_call_hyp_nvhe(__pkvm_teardown_tracing))
383 return;
384
385 ring_buffer_free(hyp_buffer->trace_buffer);
386 hyp_trace_teardown_pages(desc, INT_MAX);
387 hyp_trace_free_pages(desc);
388 free_pages_exact(desc, desc_size);
389 hyp_buffer->trace_buffer = NULL;
390 }
391
hyp_trace_start(void)392 static int hyp_trace_start(void)
393 {
394 struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
395 int ret = 0;
396
397 mutex_lock(&hyp_buffer->lock);
398
399 if (hyp_buffer->tracing_on)
400 goto out;
401
402 hyp_clock_start(hyp_buffer);
403
404 ret = hyp_trace_buffer_load(hyp_buffer, hyp_trace_buffer_size);
405 if (ret)
406 goto out;
407
408 hyp_clock_wait(hyp_buffer);
409
410 ret = kvm_call_hyp_nvhe(__pkvm_enable_tracing, true);
411 if (ret) {
412 hyp_trace_buffer_teardown(hyp_buffer);
413 goto out;
414 }
415
416 hyp_buffer->tracing_on = true;
417
418 out:
419 if (!hyp_buffer->tracing_on)
420 hyp_clock_stop(hyp_buffer);
421
422 mutex_unlock(&hyp_buffer->lock);
423
424 return ret;
425 }
426
hyp_trace_stop(void)427 static void hyp_trace_stop(void)
428 {
429 struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
430 int ret;
431
432 mutex_lock(&hyp_buffer->lock);
433
434 if (!hyp_buffer->tracing_on)
435 goto end;
436
437 ret = kvm_call_hyp_nvhe(__pkvm_enable_tracing, false);
438 if (!ret) {
439 hyp_clock_stop(hyp_buffer);
440 ring_buffer_poll_writer(hyp_buffer->trace_buffer,
441 RING_BUFFER_ALL_CPUS);
442 hyp_buffer->tracing_on = false;
443 hyp_trace_buffer_teardown(hyp_buffer);
444 }
445
446 end:
447 mutex_unlock(&hyp_buffer->lock);
448 }
449
hyp_tracing_on(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)450 static ssize_t hyp_tracing_on(struct file *filp, const char __user *ubuf,
451 size_t cnt, loff_t *ppos)
452 {
453 unsigned long val;
454 int ret;
455
456 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
457 if (ret)
458 return ret;
459
460 if (val)
461 ret = hyp_trace_start();
462 else
463 hyp_trace_stop();
464
465 return ret ? ret : cnt;
466 }
467
hyp_tracing_on_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)468 static ssize_t hyp_tracing_on_read(struct file *filp, char __user *ubuf,
469 size_t cnt, loff_t *ppos)
470 {
471 char buf[3];
472 int r;
473
474 mutex_lock(&hyp_trace_buffer.lock);
475 r = sprintf(buf, "%d\n", hyp_trace_buffer.tracing_on);
476 mutex_unlock(&hyp_trace_buffer.lock);
477
478 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
479 }
480
481 static const struct file_operations hyp_tracing_on_fops = {
482 .write = hyp_tracing_on,
483 .read = hyp_tracing_on_read,
484 };
485
hyp_buffer_size(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)486 static ssize_t hyp_buffer_size(struct file *filp, const char __user *ubuf,
487 size_t cnt, loff_t *ppos)
488 {
489 unsigned long val;
490 int ret;
491
492 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
493 if (ret)
494 return ret;
495
496 if (!val)
497 return -EINVAL;
498
499 mutex_lock(&hyp_trace_buffer.lock);
500 hyp_trace_buffer_size = val << 10; /* KB to B */
501 mutex_unlock(&hyp_trace_buffer.lock);
502
503 return cnt;
504 }
505
hyp_buffer_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)506 static ssize_t hyp_buffer_size_read(struct file *filp, char __user *ubuf,
507 size_t cnt, loff_t *ppos)
508 {
509 char buf[64];
510 int r;
511
512 mutex_lock(&hyp_trace_buffer.lock);
513 r = sprintf(buf, "%lu (%s)\n", hyp_trace_buffer_size >> 10,
514 hyp_trace_buffer_loaded(&hyp_trace_buffer) ?
515 "loaded" : "unloaded");
516 mutex_unlock(&hyp_trace_buffer.lock);
517
518 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
519 }
520
521 static const struct file_operations hyp_buffer_size_fops = {
522 .write = hyp_buffer_size,
523 .read = hyp_buffer_size_read,
524 };
525
ht_print_trace_time(struct ht_iterator * iter)526 static void ht_print_trace_time(struct ht_iterator *iter)
527 {
528 unsigned long usecs_rem;
529 u64 ts_ns = iter->ts;
530
531 do_div(ts_ns, 1000);
532 usecs_rem = do_div(ts_ns, USEC_PER_SEC);
533
534 trace_seq_printf(&iter->seq, "%5lu.%06lu: ",
535 (unsigned long)ts_ns, usecs_rem);
536 }
537
ht_print_trace_cpu(struct ht_iterator * iter)538 static void ht_print_trace_cpu(struct ht_iterator *iter)
539 {
540 trace_seq_printf(&iter->seq, "[%03d]\t", iter->ent_cpu);
541 }
542
ht_print_trace_fmt(struct ht_iterator * iter)543 static int ht_print_trace_fmt(struct ht_iterator *iter)
544 {
545 struct hyp_event *e;
546
547 if (iter->lost_events)
548 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
549 iter->ent_cpu, iter->lost_events);
550
551 ht_print_trace_cpu(iter);
552 ht_print_trace_time(iter);
553
554 e = hyp_trace_find_event(iter->ent->id);
555 if (e)
556 e->trace_func(iter);
557 else
558 trace_seq_printf(&iter->seq, "Unknown event id %d\n", iter->ent->id);
559
560 return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
561 };
562
__ht_next_pipe_event(struct ht_iterator * iter)563 static struct ring_buffer_event *__ht_next_pipe_event(struct ht_iterator *iter)
564 {
565 struct trace_buffer *trace_buffer = iter->hyp_buffer->trace_buffer;
566 struct ring_buffer_event *evt = NULL;
567 int cpu = iter->cpu;
568
569 if (cpu != RING_BUFFER_ALL_CPUS) {
570 if (ring_buffer_empty_cpu(trace_buffer, cpu))
571 return NULL;
572
573 iter->ent_cpu = cpu;
574
575 return ring_buffer_peek(trace_buffer, cpu, &iter->ts,
576 &iter->lost_events);
577 }
578
579 iter->ts = LLONG_MAX;
580 for_each_possible_cpu(cpu) {
581 struct ring_buffer_event *_evt;
582 unsigned long lost_events;
583 u64 ts;
584
585 if (ring_buffer_empty_cpu(trace_buffer, cpu))
586 continue;
587
588 _evt = ring_buffer_peek(trace_buffer, cpu, &ts,
589 &lost_events);
590 if (!_evt)
591 continue;
592
593 if (ts >= iter->ts)
594 continue;
595
596 iter->ts = ts;
597 iter->ent_cpu = cpu;
598 iter->lost_events = lost_events;
599 evt = _evt;
600 }
601
602 return evt;
603 }
604
ht_next_pipe_event(struct ht_iterator * iter)605 static void *ht_next_pipe_event(struct ht_iterator *iter)
606 {
607 struct ring_buffer_event *event;
608
609 event = __ht_next_pipe_event(iter);
610 if (!event)
611 return NULL;
612
613 iter->ent = (struct hyp_entry_hdr *)&event->array[1];
614 iter->ent_size = event->array[0];
615
616 return iter;
617 }
618
619 static ssize_t
hyp_trace_pipe_read(struct file * file,char __user * ubuf,size_t cnt,loff_t * ppos)620 hyp_trace_pipe_read(struct file *file, char __user *ubuf,
621 size_t cnt, loff_t *ppos)
622 {
623 struct ht_iterator *iter = (struct ht_iterator *)file->private_data;
624 struct trace_buffer *trace_buffer = iter->hyp_buffer->trace_buffer;
625 int ret;
626
627 copy_to_user:
628 ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
629 if (ret != -EBUSY)
630 return ret;
631
632 trace_seq_init(&iter->seq);
633
634 ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
635 if (ret < 0)
636 return ret;
637
638 while (ht_next_pipe_event(iter)) {
639 int prev_len = iter->seq.seq.len;
640
641 if (ht_print_trace_fmt(iter)) {
642 iter->seq.seq.len = prev_len;
643 break;
644 }
645
646 ring_buffer_consume(trace_buffer, iter->ent_cpu, NULL, NULL);
647 }
648
649 goto copy_to_user;
650 }
651
652 static void hyp_trace_buffer_printk(struct hyp_trace_buffer *hyp_buffer);
653
__poll_writer(struct work_struct * work)654 static void __poll_writer(struct work_struct *work)
655 {
656 struct delayed_work *dwork = to_delayed_work(work);
657 struct ht_iterator *iter;
658
659 iter = container_of(dwork, struct ht_iterator, poll_work);
660
661 ring_buffer_poll_writer(iter->hyp_buffer->trace_buffer, iter->cpu);
662
663 hyp_trace_buffer_printk(iter->hyp_buffer);
664
665 schedule_delayed_work((struct delayed_work *)work,
666 msecs_to_jiffies(RB_POLL_MS));
667 }
668
669 static struct ht_iterator *
ht_iterator_create(struct hyp_trace_buffer * hyp_buffer,int cpu)670 ht_iterator_create(struct hyp_trace_buffer *hyp_buffer, int cpu)
671 {
672 struct ht_iterator *iter = NULL;
673 int ret;
674
675 WARN_ON(!mutex_is_locked(&hyp_buffer->lock));
676
677 if (hyp_buffer->nr_readers == INT_MAX) {
678 ret = -EBUSY;
679 goto unlock;
680 }
681
682 ret = hyp_trace_buffer_load(hyp_buffer, hyp_trace_buffer_size);
683 if (ret)
684 goto unlock;
685
686 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
687 if (!iter) {
688 ret = -ENOMEM;
689 goto unlock;
690 }
691 iter->hyp_buffer = hyp_buffer;
692 iter->cpu = cpu;
693 trace_seq_init(&iter->seq);
694
695 ret = ring_buffer_poll_writer(hyp_buffer->trace_buffer, cpu);
696 if (ret)
697 goto unlock;
698
699 INIT_DELAYED_WORK(&iter->poll_work, __poll_writer);
700 schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(RB_POLL_MS));
701
702 hyp_buffer->nr_readers++;
703
704 unlock:
705 if (ret) {
706 hyp_trace_buffer_teardown(hyp_buffer);
707 kfree(iter);
708 iter = NULL;
709 }
710
711 return iter;
712 }
713
hyp_trace_pipe_open(struct inode * inode,struct file * file)714 static int hyp_trace_pipe_open(struct inode *inode, struct file *file)
715 {
716 struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
717 int cpu = (s64)inode->i_private;
718
719 mutex_lock(&hyp_buffer->lock);
720
721 file->private_data = ht_iterator_create(hyp_buffer, cpu);
722
723 mutex_unlock(&hyp_buffer->lock);
724
725 return file->private_data ? 0 : -EINVAL;
726 }
727
hyp_trace_pipe_release(struct inode * inode,struct file * file)728 static int hyp_trace_pipe_release(struct inode *inode, struct file *file)
729 {
730 struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
731 struct ht_iterator *iter = file->private_data;
732
733 cancel_delayed_work_sync(&iter->poll_work);
734
735 mutex_lock(&hyp_buffer->lock);
736
737 WARN_ON(--hyp_buffer->nr_readers < 0);
738
739 hyp_trace_buffer_teardown(hyp_buffer);
740
741 mutex_unlock(&hyp_buffer->lock);
742
743 kfree(iter);
744
745 return 0;
746 }
747
748 static const struct file_operations hyp_trace_pipe_fops = {
749 .open = hyp_trace_pipe_open,
750 .read = hyp_trace_pipe_read,
751 .release = hyp_trace_pipe_release,
752 };
753
754 static ssize_t
hyp_trace_raw_read(struct file * file,char __user * ubuf,size_t cnt,loff_t * ppos)755 hyp_trace_raw_read(struct file *file, char __user *ubuf,
756 size_t cnt, loff_t *ppos)
757 {
758 struct ht_iterator *iter = (struct ht_iterator *)file->private_data;
759 size_t size;
760 int ret;
761 void *page_data;
762
763 if (iter->copy_leftover)
764 goto read;
765
766 again:
767 ret = ring_buffer_read_page(iter->hyp_buffer->trace_buffer,
768 (struct buffer_data_read_page *)iter->spare,
769 cnt, iter->cpu, 0);
770 if (ret < 0) {
771 if (!ring_buffer_empty_cpu(iter->hyp_buffer->trace_buffer,
772 iter->cpu))
773 return 0;
774
775 ret = ring_buffer_wait(iter->hyp_buffer->trace_buffer,
776 iter->cpu, 0, NULL, NULL);
777 if (ret < 0)
778 return ret;
779
780 goto again;
781 }
782
783 iter->copy_leftover = 0;
784
785 read:
786 size = PAGE_SIZE - iter->copy_leftover;
787 if (size > cnt)
788 size = cnt;
789
790 page_data = ring_buffer_read_page_data(
791 (struct buffer_data_read_page *)iter->spare);
792 ret = copy_to_user(ubuf, page_data + PAGE_SIZE - size, size);
793 if (ret == size)
794 return -EFAULT;
795
796 size -= ret;
797 *ppos += size;
798 iter->copy_leftover = ret;
799
800 return size;
801 }
802
hyp_trace_raw_open(struct inode * inode,struct file * file)803 static int hyp_trace_raw_open(struct inode *inode, struct file *file)
804 {
805 int ret = hyp_trace_pipe_open(inode, file);
806 struct ht_iterator *iter;
807
808 if (ret)
809 return ret;
810
811 iter = file->private_data;
812 iter->spare = ring_buffer_alloc_read_page(iter->hyp_buffer->trace_buffer,
813 iter->cpu);
814 if (IS_ERR(iter->spare)) {
815 ret = PTR_ERR(iter->spare);
816 iter->spare = NULL;
817 return ret;
818 }
819
820 return 0;
821 }
822
hyp_trace_raw_release(struct inode * inode,struct file * file)823 static int hyp_trace_raw_release(struct inode *inode, struct file *file)
824 {
825 struct ht_iterator *iter = file->private_data;
826
827 ring_buffer_free_read_page(iter->hyp_buffer->trace_buffer, iter->cpu,
828 iter->spare);
829
830 return hyp_trace_pipe_release(inode, file);
831 }
832
833 static const struct file_operations hyp_trace_raw_fops = {
834 .open = hyp_trace_raw_open,
835 .read = hyp_trace_raw_read,
836 .release = hyp_trace_raw_release,
837 };
838
hyp_trace_reset(int cpu)839 static void hyp_trace_reset(int cpu)
840 {
841 struct hyp_trace_buffer *hyp_buffer = &hyp_trace_buffer;
842
843 mutex_lock(&hyp_buffer->lock);
844
845 if (!hyp_trace_buffer_loaded(hyp_buffer))
846 goto out;
847
848 if (cpu == RING_BUFFER_ALL_CPUS)
849 ring_buffer_reset(hyp_buffer->trace_buffer);
850 else
851 ring_buffer_reset_cpu(hyp_buffer->trace_buffer, cpu);
852
853 out:
854 mutex_unlock(&hyp_buffer->lock);
855 }
856
hyp_trace_open(struct inode * inode,struct file * file)857 static int hyp_trace_open(struct inode *inode, struct file *file)
858 {
859 int cpu = (s64)inode->i_private;
860
861 if (file->f_mode & FMODE_WRITE)
862 hyp_trace_reset(cpu);
863
864 return 0;
865 }
866
hyp_trace_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)867 static ssize_t hyp_trace_write(struct file *filp, const char __user *ubuf,
868 size_t count, loff_t *ppos)
869 {
870 return count;
871 }
872
873 static const struct file_operations hyp_trace_fops = {
874 .open = hyp_trace_open,
875 .write = hyp_trace_write,
876 .release = NULL,
877 };
878
hyp_trace_clock_show(struct seq_file * m,void * v)879 static int hyp_trace_clock_show(struct seq_file *m, void *v)
880 {
881 seq_puts(m, "[boot]\n");
882
883 return 0;
884 }
885 DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock);
886
887 #ifdef CONFIG_PKVM_SELFTESTS
selftest_event_open(struct inode * inode,struct file * file)888 static int selftest_event_open(struct inode *inode, struct file *file)
889 {
890 if (file->f_mode & FMODE_WRITE)
891 return kvm_call_hyp_nvhe(__pkvm_selftest_event);
892
893 return 0;
894 }
895
selftest_event_write(struct file * f,const char __user * buf,size_t cnt,loff_t * pos)896 static ssize_t selftest_event_write(struct file *f, const char __user *buf,
897 size_t cnt, loff_t *pos)
898 {
899 return cnt;
900 }
901
902 static const struct file_operations selftest_event_fops = {
903 .open = selftest_event_open,
904 .write = selftest_event_write,
905 };
906
hyp_trace_init_testing_tracefs(struct dentry * root)907 static void hyp_trace_init_testing_tracefs(struct dentry *root)
908 {
909 tracefs_create_file("selftest_event", TRACEFS_MODE_WRITE, root, NULL,
910 &selftest_event_fops);
911 }
912 #else
hyp_trace_init_testing_tracefs(struct dentry * root)913 static void hyp_trace_init_testing_tracefs(struct dentry *root) { }
914 #endif
915
hyp_trace_buffer_printk_init(struct hyp_trace_buffer * hyp_buffer)916 static int hyp_trace_buffer_printk_init(struct hyp_trace_buffer *hyp_buffer)
917 {
918 int ret = 0;
919
920 mutex_lock(&hyp_buffer->lock);
921
922 if (hyp_buffer->printk_iter)
923 goto unlock;
924
925 hyp_buffer->printk_iter = ht_iterator_create(hyp_buffer,
926 RING_BUFFER_ALL_CPUS);
927 if (!hyp_buffer->printk_iter)
928 ret = -EINVAL;
929 unlock:
930 mutex_unlock(&hyp_buffer->lock);
931
932 return ret;
933 }
934
hyp_trace_buffer_printk(struct hyp_trace_buffer * hyp_buffer)935 static void hyp_trace_buffer_printk(struct hyp_trace_buffer *hyp_buffer)
936 {
937 struct ht_iterator *ht_iter = hyp_buffer->printk_iter;
938
939 if (!hyp_trace_buffer.printk_on)
940 return;
941
942 trace_seq_init(&ht_iter->seq);
943 while (ht_next_pipe_event(ht_iter)) {
944 ht_print_trace_fmt(ht_iter);
945
946 /* Nothing has been written in the seq_buf */
947 if (!ht_iter->seq.seq.len)
948 return;
949
950 ht_iter->seq.buffer[ht_iter->seq.seq.len] = '\0';
951 printk("%s", ht_iter->seq.buffer);
952
953 ht_iter->seq.seq.len = 0;
954 ring_buffer_consume(hyp_buffer->trace_buffer, ht_iter->ent_cpu,
955 NULL, NULL);
956 }
957 }
958
hyp_trace_panic_handler(struct notifier_block * self,unsigned long ev,void * v)959 static int hyp_trace_panic_handler(struct notifier_block *self,
960 unsigned long ev, void *v)
961 {
962 #ifdef CONFIG_PKVM_DUMP_TRACE_ON_PANIC
963 if (!hyp_trace_buffer_loaded(&hyp_trace_buffer) ||
964 !hyp_trace_buffer.printk_iter)
965 return NOTIFY_DONE;
966
967 if (!strncmp("HYP panic:", v, 10))
968 hyp_trace_panic = true;
969
970 ring_buffer_poll_writer(hyp_trace_buffer.trace_buffer, RING_BUFFER_ALL_CPUS);
971 hyp_trace_buffer_printk(&hyp_trace_buffer);
972 #endif
973 return NOTIFY_DONE;
974 }
975
976 static struct notifier_block hyp_trace_panic_notifier = {
977 .notifier_call = hyp_trace_panic_handler,
978 .priority = INT_MAX - 1,
979 };
980
hyp_trace_enable_event_early(void)981 void hyp_trace_enable_event_early(void)
982 {
983 if (hyp_event_early_probe()) {
984 int err = hyp_trace_start();
985
986 if (err)
987 pr_warn("Failed to start early events tracing: %d\n", err);
988 }
989 }
990
hyp_trace_init_tracefs(void)991 int hyp_trace_init_tracefs(void)
992 {
993 struct dentry *root, *per_cpu_root;
994 char per_cpu_name[16];
995 long cpu;
996
997 if (!is_protected_kvm_enabled())
998 return 0;
999
1000 root = tracefs_create_dir(TRACEFS_DIR, NULL);
1001 if (!root) {
1002 pr_err("Failed to create tracefs "TRACEFS_DIR"/\n");
1003 return -ENODEV;
1004 }
1005
1006 tracefs_create_file("tracing_on", TRACEFS_MODE_WRITE, root, NULL,
1007 &hyp_tracing_on_fops);
1008
1009 tracefs_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, root, NULL,
1010 &hyp_buffer_size_fops);
1011
1012 tracefs_create_file("trace_pipe", TRACEFS_MODE_WRITE, root,
1013 (void *)RING_BUFFER_ALL_CPUS, &hyp_trace_pipe_fops);
1014
1015 tracefs_create_file("trace", TRACEFS_MODE_WRITE, root,
1016 (void *)RING_BUFFER_ALL_CPUS, &hyp_trace_fops);
1017
1018 tracefs_create_file("trace_clock", TRACEFS_MODE_READ, root, NULL,
1019 &hyp_trace_clock_fops);
1020
1021 per_cpu_root = tracefs_create_dir("per_cpu", root);
1022 if (!per_cpu_root) {
1023 pr_err("Failed to create tracefs folder "TRACEFS_DIR"/per_cpu/\n");
1024 return -ENODEV;
1025 }
1026
1027 for_each_possible_cpu(cpu) {
1028 struct dentry *per_cpu_dir;
1029
1030 snprintf(per_cpu_name, sizeof(per_cpu_name), "cpu%ld", cpu);
1031 per_cpu_dir = tracefs_create_dir(per_cpu_name, per_cpu_root);
1032 if (!per_cpu_dir) {
1033 pr_warn("Failed to create tracefs "TRACEFS_DIR"/per_cpu/cpu%ld\n",
1034 cpu);
1035 continue;
1036 }
1037
1038 tracefs_create_file("trace_pipe", TRACEFS_MODE_READ, per_cpu_dir,
1039 (void *)cpu, &hyp_trace_pipe_fops);
1040
1041 tracefs_create_file("trace_pipe_raw", TRACEFS_MODE_READ, per_cpu_dir,
1042 (void *)cpu, &hyp_trace_raw_fops);
1043
1044 tracefs_create_file("trace", TRACEFS_MODE_WRITE, per_cpu_dir,
1045 (void *)cpu, &hyp_trace_fops);
1046 }
1047
1048 hyp_trace_init_event_tracefs(root);
1049
1050 hyp_trace_enable_event_early();
1051
1052 hyp_trace_init_testing_tracefs(root);
1053
1054 if (hyp_trace_buffer.printk_on &&
1055 hyp_trace_buffer_printk_init(&hyp_trace_buffer))
1056 pr_warn("Failed to init ht_printk");
1057
1058 atomic_notifier_chain_register(&panic_notifier_list, &hyp_trace_panic_notifier);
1059
1060 return 0;
1061 }
1062