1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2023 Google LLC
4 * Author: Vincent Donnefort <vdonnefort@google.com>
5 */
6
7 #include <nvhe/alloc.h>
8 #include <nvhe/clock.h>
9 #include <nvhe/mem_protect.h>
10 #include <nvhe/mm.h>
11 #include <nvhe/trace/trace.h>
12
13 #include <asm/percpu.h>
14 #include <asm/kvm_mmu.h>
15 #include <asm/local.h>
16
17 #define HYP_RB_PAGE_HEAD 1UL
18 #define HYP_RB_PAGE_UPDATE 2UL
19 #define HYP_RB_FLAG_MASK 3UL
20
21 struct hyp_buffer_page {
22 struct list_head list;
23 struct buffer_data_page *page;
24 unsigned long write;
25 unsigned long entries;
26 u32 id;
27 };
28
29 struct hyp_rb_per_cpu {
30 struct ring_buffer_meta *meta;
31 struct hyp_buffer_page *tail_page;
32 struct hyp_buffer_page *reader_page;
33 struct hyp_buffer_page *head_page;
34 struct hyp_buffer_page *bpages;
35 unsigned long nr_pages;
36 unsigned long last_overrun;
37 u64 write_stamp;
38 atomic_t status;
39 };
40
41 #define HYP_RB_UNAVAILABLE 0
42 #define HYP_RB_READY 1
43 #define HYP_RB_WRITING 2
44
45 DEFINE_PER_CPU(struct hyp_rb_per_cpu, trace_rb);
46 DEFINE_HYP_SPINLOCK(trace_rb_lock);
47
rb_set_flag(struct hyp_buffer_page * bpage,int new_flag)48 static bool rb_set_flag(struct hyp_buffer_page *bpage, int new_flag)
49 {
50 unsigned long ret, val = (unsigned long)bpage->list.next;
51
52 ret = cmpxchg((unsigned long *)&bpage->list.next,
53 val, (val & ~HYP_RB_FLAG_MASK) | new_flag);
54
55 return ret == val;
56 }
57
rb_hyp_buffer_page(struct list_head * list)58 static struct hyp_buffer_page *rb_hyp_buffer_page(struct list_head *list)
59 {
60 unsigned long ptr = (unsigned long)list & ~HYP_RB_FLAG_MASK;
61
62 return container_of((struct list_head *)ptr, struct hyp_buffer_page, list);
63 }
64
rb_next_page(struct hyp_buffer_page * bpage)65 static struct hyp_buffer_page *rb_next_page(struct hyp_buffer_page *bpage)
66 {
67 return rb_hyp_buffer_page(bpage->list.next);
68 }
69
rb_is_head_page(struct hyp_buffer_page * bpage)70 static bool rb_is_head_page(struct hyp_buffer_page *bpage)
71 {
72 return (unsigned long)bpage->list.prev->next & HYP_RB_PAGE_HEAD;
73 }
74
rb_set_head_page(struct hyp_rb_per_cpu * cpu_buffer)75 static struct hyp_buffer_page *rb_set_head_page(struct hyp_rb_per_cpu *cpu_buffer)
76 {
77 struct hyp_buffer_page *bpage, *prev_head;
78 int cnt = 0;
79 again:
80 bpage = prev_head = cpu_buffer->head_page;
81 do {
82 if (rb_is_head_page(bpage)) {
83 cpu_buffer->head_page = bpage;
84 return bpage;
85 }
86
87 bpage = rb_next_page(bpage);
88 } while (bpage != prev_head);
89
90 /* We might have race with the writer let's try again */
91 if (++cnt < 3)
92 goto again;
93
94 return NULL;
95 }
96
rb_swap_reader_page(struct hyp_rb_per_cpu * cpu_buffer)97 static int rb_swap_reader_page(struct hyp_rb_per_cpu *cpu_buffer)
98 {
99 unsigned long *old_head_link, old_link_val, new_link_val, overrun;
100 struct hyp_buffer_page *head, *reader = cpu_buffer->reader_page;
101 spin:
102 /* Update the cpu_buffer->header_page according to HYP_RB_PAGE_HEAD */
103 head = rb_set_head_page(cpu_buffer);
104 if (!head)
105 return -ENODEV;
106
107 /* Connect the reader page around the header page */
108 reader->list.next = head->list.next;
109 reader->list.prev = head->list.prev;
110
111 /* The reader page points to the new header page */
112 rb_set_flag(reader, HYP_RB_PAGE_HEAD);
113
114 /*
115 * Paired with the cmpxchg in rb_move_tail(). Order the read of the head
116 * page and overrun.
117 */
118 smp_mb();
119 overrun = READ_ONCE(cpu_buffer->meta->overrun);
120
121 /* Try to swap the prev head link to the reader page */
122 old_head_link = (unsigned long *)&reader->list.prev->next;
123 old_link_val = (*old_head_link & ~HYP_RB_FLAG_MASK) | HYP_RB_PAGE_HEAD;
124 new_link_val = (unsigned long)&reader->list;
125 if (cmpxchg(old_head_link, old_link_val, new_link_val)
126 != old_link_val)
127 goto spin;
128
129 cpu_buffer->head_page = rb_hyp_buffer_page(reader->list.next);
130 cpu_buffer->head_page->list.prev = &reader->list;
131 cpu_buffer->reader_page = head;
132 cpu_buffer->meta->reader_page.lost_events = overrun - cpu_buffer->last_overrun;
133 cpu_buffer->meta->reader_page.id = cpu_buffer->reader_page->id;
134 cpu_buffer->last_overrun = overrun;
135
136 return 0;
137 }
138
139 static struct hyp_buffer_page *
rb_move_tail(struct hyp_rb_per_cpu * cpu_buffer)140 rb_move_tail(struct hyp_rb_per_cpu *cpu_buffer)
141 {
142 struct hyp_buffer_page *tail_page, *new_tail, *new_head;
143
144 tail_page = cpu_buffer->tail_page;
145 new_tail = rb_next_page(tail_page);
146 again:
147 /*
148 * We caught the reader ... Let's try to move the head page.
149 * The writer can only rely on ->next links to check if this is head.
150 */
151 if ((unsigned long)tail_page->list.next & HYP_RB_PAGE_HEAD) {
152 /* The reader moved the head in between */
153 if (!rb_set_flag(tail_page, HYP_RB_PAGE_UPDATE))
154 goto again;
155
156 WRITE_ONCE(cpu_buffer->meta->overrun,
157 cpu_buffer->meta->overrun + new_tail->entries);
158 WRITE_ONCE(cpu_buffer->meta->pages_lost,
159 cpu_buffer->meta->pages_lost + 1);
160
161 /* Move the head */
162 rb_set_flag(new_tail, HYP_RB_PAGE_HEAD);
163
164 /* The new head is in place, reset the update flag */
165 rb_set_flag(tail_page, 0);
166
167 new_head = rb_next_page(new_tail);
168 }
169
170 local_set(&new_tail->page->commit, 0);
171
172 new_tail->write = 0;
173 new_tail->entries = 0;
174
175 WRITE_ONCE(cpu_buffer->meta->pages_touched,
176 cpu_buffer->meta->pages_touched + 1);
177 cpu_buffer->tail_page = new_tail;
178
179 return new_tail;
180 }
181
rb_event_size(unsigned long length)182 unsigned long rb_event_size(unsigned long length)
183 {
184 struct ring_buffer_event *event;
185
186 return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
187 }
188
189 static struct ring_buffer_event *
rb_add_ts_extend(struct ring_buffer_event * event,u64 delta)190 rb_add_ts_extend(struct ring_buffer_event *event, u64 delta)
191 {
192 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
193 event->time_delta = delta & TS_MASK;
194 event->array[0] = delta >> TS_SHIFT;
195
196 return (struct ring_buffer_event *)((unsigned long)event + 8);
197 }
198
199 static struct ring_buffer_event *
rb_reserve_next(struct hyp_rb_per_cpu * cpu_buffer,unsigned long length)200 rb_reserve_next(struct hyp_rb_per_cpu *cpu_buffer, unsigned long length)
201 {
202 unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
203 struct hyp_buffer_page *tail_page = cpu_buffer->tail_page;
204 struct ring_buffer_event *event;
205 unsigned long write, prev_write;
206 u64 ts, time_delta;
207
208 ts = trace_clock();
209
210 time_delta = ts - cpu_buffer->write_stamp;
211
212 if (test_time_stamp(time_delta))
213 ts_ext_size = 8;
214
215 prev_write = tail_page->write;
216 write = prev_write + event_size + ts_ext_size;
217
218 if (unlikely(write > BUF_PAGE_SIZE))
219 tail_page = rb_move_tail(cpu_buffer);
220
221 if (!tail_page->entries) {
222 tail_page->page->time_stamp = ts;
223 time_delta = 0;
224 ts_ext_size = 0;
225 write = event_size;
226 prev_write = 0;
227 }
228
229 tail_page->write = write;
230 tail_page->entries++;
231
232 cpu_buffer->write_stamp = ts;
233
234 event = (struct ring_buffer_event *)(tail_page->page->data +
235 prev_write);
236 if (ts_ext_size) {
237 event = rb_add_ts_extend(event, time_delta);
238 time_delta = 0;
239 }
240
241 event->type_len = 0;
242 event->time_delta = time_delta;
243 event->array[0] = event_size - RB_EVNT_HDR_SIZE;
244
245 return event;
246 }
247
tracing_reserve_entry(unsigned long length)248 void *tracing_reserve_entry(unsigned long length)
249 {
250 struct hyp_rb_per_cpu *cpu_buffer = this_cpu_ptr(&trace_rb);
251 struct ring_buffer_event *rb_event;
252
253 if (atomic_cmpxchg(&cpu_buffer->status, HYP_RB_READY, HYP_RB_WRITING)
254 == HYP_RB_UNAVAILABLE)
255 return NULL;
256
257 rb_event = rb_reserve_next(cpu_buffer, length);
258
259 return &rb_event->array[1];
260 }
261
tracing_commit_entry(void)262 void tracing_commit_entry(void)
263 {
264 struct hyp_rb_per_cpu *cpu_buffer = this_cpu_ptr(&trace_rb);
265
266 local_set(&cpu_buffer->tail_page->page->commit,
267 cpu_buffer->tail_page->write);
268 WRITE_ONCE(cpu_buffer->meta->entries,
269 cpu_buffer->meta->entries + 1);
270
271 /* Paired with rb_cpu_disable_writing() */
272 atomic_set_release(&cpu_buffer->status, HYP_RB_READY);
273 }
274
rb_page_init(struct hyp_buffer_page * bpage,unsigned long hva)275 static int rb_page_init(struct hyp_buffer_page *bpage, unsigned long hva)
276 {
277 void *hyp_va = (void *)kern_hyp_va(hva);
278 int ret;
279
280 ret = hyp_pin_shared_mem(hyp_va, hyp_va + PAGE_SIZE);
281 if (ret)
282 return ret;
283
284 INIT_LIST_HEAD(&bpage->list);
285 bpage->page = (struct buffer_data_page *)hyp_va;
286
287 local_set(&bpage->page->commit, 0);
288
289 return 0;
290 }
291
rb_cpu_loaded(struct hyp_rb_per_cpu * cpu_buffer)292 static bool rb_cpu_loaded(struct hyp_rb_per_cpu *cpu_buffer)
293 {
294 return !!cpu_buffer->bpages;
295 }
296
rb_cpu_disable_writing(struct hyp_rb_per_cpu * cpu_buffer)297 static void rb_cpu_disable_writing(struct hyp_rb_per_cpu *cpu_buffer)
298 {
299 int prev_status;
300
301 /* Wait for release of the buffer */
302 do {
303 prev_status = atomic_cmpxchg_acquire(&cpu_buffer->status,
304 HYP_RB_READY,
305 HYP_RB_UNAVAILABLE);
306 } while (prev_status == HYP_RB_WRITING);
307 }
308
rb_cpu_enable_writing(struct hyp_rb_per_cpu * cpu_buffer)309 static int rb_cpu_enable_writing(struct hyp_rb_per_cpu *cpu_buffer)
310 {
311 if (!rb_cpu_loaded(cpu_buffer))
312 return -ENODEV;
313
314 atomic_cmpxchg(&cpu_buffer->status, HYP_RB_UNAVAILABLE, HYP_RB_READY);
315
316 return 0;
317 }
318
rb_cpu_teardown(struct hyp_rb_per_cpu * cpu_buffer)319 static void rb_cpu_teardown(struct hyp_rb_per_cpu *cpu_buffer)
320 {
321 int i;
322
323 if (!rb_cpu_loaded(cpu_buffer))
324 return;
325
326 rb_cpu_disable_writing(cpu_buffer);
327
328 hyp_unpin_shared_mem((void *)cpu_buffer->meta,
329 (void *)(cpu_buffer->meta) + PAGE_SIZE);
330
331 for (i = 0; i < cpu_buffer->nr_pages; i++) {
332 struct hyp_buffer_page *bpage = &cpu_buffer->bpages[i];
333
334 if (!bpage->page)
335 continue;
336
337 hyp_unpin_shared_mem((void *)bpage->page,
338 (void *)bpage->page + PAGE_SIZE);
339 }
340
341 hyp_free(cpu_buffer->bpages);
342 cpu_buffer->bpages = 0;
343 }
344
rb_cpu_fits_desc(struct rb_page_desc * pdesc,unsigned long desc_end)345 static bool rb_cpu_fits_desc(struct rb_page_desc *pdesc,
346 unsigned long desc_end)
347 {
348 unsigned long *end;
349
350 /* Check we can at least read nr_pages */
351 if ((unsigned long)&pdesc->nr_page_va >= desc_end)
352 return false;
353
354 end = &pdesc->page_va[pdesc->nr_page_va];
355
356 return (unsigned long)end <= desc_end;
357 }
358
rb_cpu_init(struct rb_page_desc * pdesc,struct hyp_rb_per_cpu * cpu_buffer)359 static int rb_cpu_init(struct rb_page_desc *pdesc, struct hyp_rb_per_cpu *cpu_buffer)
360 {
361 struct hyp_buffer_page *bpage;
362 int i, ret;
363
364 /* At least 1 reader page and one head */
365 if (pdesc->nr_page_va < 2)
366 return -EINVAL;
367
368 if (rb_cpu_loaded(cpu_buffer))
369 return -EBUSY;
370
371 bpage = hyp_alloc(sizeof(*bpage) * pdesc->nr_page_va);
372 if (!bpage)
373 return hyp_alloc_errno();
374 cpu_buffer->bpages = bpage;
375
376 cpu_buffer->meta = (struct ring_buffer_meta *)kern_hyp_va(pdesc->meta_va);
377 ret = hyp_pin_shared_mem((void *)cpu_buffer->meta,
378 ((void *)cpu_buffer->meta) + PAGE_SIZE);
379 if (ret) {
380 hyp_free(cpu_buffer->bpages);
381 return ret;
382 }
383
384 memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
385 cpu_buffer->meta->meta_page_size = PAGE_SIZE;
386 cpu_buffer->meta->nr_data_pages = cpu_buffer->nr_pages;
387
388 /* The reader page is not part of the ring initially */
389 ret = rb_page_init(bpage, pdesc->page_va[0]);
390 if (ret)
391 goto err;
392
393 cpu_buffer->nr_pages = 1;
394
395 cpu_buffer->reader_page = bpage;
396 cpu_buffer->tail_page = bpage + 1;
397 cpu_buffer->head_page = bpage + 1;
398
399 for (i = 1; i < pdesc->nr_page_va; i++) {
400 ret = rb_page_init(++bpage, pdesc->page_va[i]);
401 if (ret)
402 goto err;
403
404 bpage->list.next = &(bpage + 1)->list;
405 bpage->list.prev = &(bpage - 1)->list;
406 bpage->id = i;
407
408 cpu_buffer->nr_pages = i + 1;
409 }
410
411 /* Close the ring */
412 bpage->list.next = &cpu_buffer->tail_page->list;
413 cpu_buffer->tail_page->list.prev = &bpage->list;
414
415 /* The last init'ed page points to the head page */
416 rb_set_flag(bpage, HYP_RB_PAGE_HEAD);
417
418 cpu_buffer->last_overrun = 0;
419
420 return 0;
421 err:
422 rb_cpu_teardown(cpu_buffer);
423
424 return ret;
425 }
426
__pkvm_swap_reader_tracing(int cpu)427 int __pkvm_swap_reader_tracing(int cpu)
428 {
429 struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
430 int ret = 0;
431
432 hyp_spin_lock(&trace_rb_lock);
433
434 if (cpu >= hyp_nr_cpus) {
435 ret = -EINVAL;
436 goto err;
437 }
438
439 cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
440 if (!rb_cpu_loaded(cpu_buffer))
441 ret = -ENODEV;
442 else
443 ret = rb_swap_reader_page(cpu_buffer);
444 err:
445 hyp_spin_unlock(&trace_rb_lock);
446
447 return ret;
448 }
449
__pkvm_teardown_tracing_locked(void)450 static void __pkvm_teardown_tracing_locked(void)
451 {
452 int cpu;
453
454 hyp_assert_lock_held(&trace_rb_lock);
455
456 for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
457 struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
458
459 rb_cpu_teardown(cpu_buffer);
460 }
461 }
462
__pkvm_teardown_tracing(void)463 void __pkvm_teardown_tracing(void)
464 {
465 hyp_spin_lock(&trace_rb_lock);
466 __pkvm_teardown_tracing_locked();
467 hyp_spin_unlock(&trace_rb_lock);
468 }
469
__pkvm_load_tracing(unsigned long desc_hva,size_t desc_size)470 int __pkvm_load_tracing(unsigned long desc_hva, size_t desc_size)
471 {
472 struct hyp_trace_desc *desc = (struct hyp_trace_desc *)kern_hyp_va(desc_hva);
473 struct trace_page_desc *trace_pdesc = &desc->page_desc;
474 struct rb_page_desc *pdesc;
475 int ret, cpu;
476
477 if (!desc_size || !PAGE_ALIGNED(desc_hva) || !PAGE_ALIGNED(desc_size))
478 return -EINVAL;
479
480 ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn((void *)desc),
481 desc_size >> PAGE_SHIFT);
482 if (ret)
483 return ret;
484
485 hyp_spin_lock(&trace_rb_lock);
486
487 trace_clock_update(&desc->clock_data);
488
489 for_each_rb_page_desc(pdesc, cpu, trace_pdesc) {
490 struct hyp_rb_per_cpu *cpu_buffer;
491 int cpu;
492
493 ret = -EINVAL;
494 if (!rb_cpu_fits_desc(pdesc, desc_hva + desc_size))
495 break;
496
497 cpu = pdesc->cpu;
498 if (cpu >= hyp_nr_cpus)
499 break;
500
501 cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
502
503 ret = rb_cpu_init(pdesc, cpu_buffer);
504 if (ret)
505 break;
506 }
507 if (ret)
508 __pkvm_teardown_tracing_locked();
509
510 hyp_spin_unlock(&trace_rb_lock);
511
512 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn((void *)desc),
513 desc_size >> PAGE_SHIFT));
514 return ret;
515 }
516
__pkvm_enable_tracing(bool enable)517 int __pkvm_enable_tracing(bool enable)
518 {
519 int cpu, ret = enable ? -EINVAL : 0;
520
521 hyp_spin_lock(&trace_rb_lock);
522 for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
523 struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
524
525 if (enable) {
526 if (!rb_cpu_enable_writing(cpu_buffer))
527 ret = 0;
528 } else {
529 rb_cpu_disable_writing(cpu_buffer);
530 }
531
532 }
533 hyp_spin_unlock(&trace_rb_lock);
534
535 return ret;
536 }
537