• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2023 Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <nvhe/alloc.h>
8 #include <nvhe/clock.h>
9 #include <nvhe/mem_protect.h>
10 #include <nvhe/mm.h>
11 #include <nvhe/trace/trace.h>
12 
13 #include <asm/percpu.h>
14 #include <asm/kvm_mmu.h>
15 #include <asm/local.h>
16 
17 #define HYP_RB_PAGE_HEAD		1UL
18 #define HYP_RB_PAGE_UPDATE		2UL
19 #define HYP_RB_FLAG_MASK		3UL
20 
21 struct hyp_buffer_page {
22 	struct list_head	list;
23 	struct buffer_data_page	*page;
24 	unsigned long		write;
25 	unsigned long		entries;
26 	u32			id;
27 };
28 
29 struct hyp_rb_per_cpu {
30 	struct ring_buffer_meta	*meta;
31 	struct hyp_buffer_page	*tail_page;
32 	struct hyp_buffer_page	*reader_page;
33 	struct hyp_buffer_page	*head_page;
34 	struct hyp_buffer_page	*bpages;
35 	unsigned long		nr_pages;
36 	unsigned long		last_overrun;
37 	u64			write_stamp;
38 	atomic_t		status;
39 };
40 
41 #define HYP_RB_UNAVAILABLE	0
42 #define HYP_RB_READY		1
43 #define HYP_RB_WRITING		2
44 
45 DEFINE_PER_CPU(struct hyp_rb_per_cpu, trace_rb);
46 DEFINE_HYP_SPINLOCK(trace_rb_lock);
47 
rb_set_flag(struct hyp_buffer_page * bpage,int new_flag)48 static bool rb_set_flag(struct hyp_buffer_page *bpage, int new_flag)
49 {
50 	unsigned long ret, val = (unsigned long)bpage->list.next;
51 
52 	ret = cmpxchg((unsigned long *)&bpage->list.next,
53 		      val, (val & ~HYP_RB_FLAG_MASK) | new_flag);
54 
55 	return ret == val;
56 }
57 
rb_hyp_buffer_page(struct list_head * list)58 static struct hyp_buffer_page *rb_hyp_buffer_page(struct list_head *list)
59 {
60 	unsigned long ptr = (unsigned long)list & ~HYP_RB_FLAG_MASK;
61 
62 	return container_of((struct list_head *)ptr, struct hyp_buffer_page, list);
63 }
64 
rb_next_page(struct hyp_buffer_page * bpage)65 static struct hyp_buffer_page *rb_next_page(struct hyp_buffer_page *bpage)
66 {
67 	return rb_hyp_buffer_page(bpage->list.next);
68 }
69 
rb_is_head_page(struct hyp_buffer_page * bpage)70 static bool rb_is_head_page(struct hyp_buffer_page *bpage)
71 {
72 	return (unsigned long)bpage->list.prev->next & HYP_RB_PAGE_HEAD;
73 }
74 
rb_set_head_page(struct hyp_rb_per_cpu * cpu_buffer)75 static struct hyp_buffer_page *rb_set_head_page(struct hyp_rb_per_cpu *cpu_buffer)
76 {
77 	struct hyp_buffer_page *bpage, *prev_head;
78 	int cnt = 0;
79 again:
80 	bpage = prev_head = cpu_buffer->head_page;
81 	do {
82 		if (rb_is_head_page(bpage)) {
83 			cpu_buffer->head_page = bpage;
84 			return bpage;
85 		}
86 
87 		bpage = rb_next_page(bpage);
88 	} while (bpage != prev_head);
89 
90 	/* We might have race with the writer let's try again */
91 	if (++cnt < 3)
92 		goto again;
93 
94 	return NULL;
95 }
96 
rb_swap_reader_page(struct hyp_rb_per_cpu * cpu_buffer)97 static int rb_swap_reader_page(struct hyp_rb_per_cpu *cpu_buffer)
98 {
99 	unsigned long *old_head_link, old_link_val, new_link_val, overrun;
100 	struct hyp_buffer_page *head, *reader = cpu_buffer->reader_page;
101 spin:
102 	/* Update the cpu_buffer->header_page according to HYP_RB_PAGE_HEAD */
103 	head = rb_set_head_page(cpu_buffer);
104 	if (!head)
105 		return -ENODEV;
106 
107 	/* Connect the reader page around the header page */
108 	reader->list.next = head->list.next;
109 	reader->list.prev = head->list.prev;
110 
111 	/* The reader page points to the new header page */
112 	rb_set_flag(reader, HYP_RB_PAGE_HEAD);
113 
114 	/*
115 	 * Paired with the cmpxchg in rb_move_tail(). Order the read of the head
116 	 * page and overrun.
117 	 */
118 	smp_mb();
119 	overrun = READ_ONCE(cpu_buffer->meta->overrun);
120 
121 	/* Try to swap the prev head link to the reader page */
122 	old_head_link = (unsigned long *)&reader->list.prev->next;
123 	old_link_val = (*old_head_link & ~HYP_RB_FLAG_MASK) | HYP_RB_PAGE_HEAD;
124 	new_link_val = (unsigned long)&reader->list;
125 	if (cmpxchg(old_head_link, old_link_val, new_link_val)
126 		      != old_link_val)
127 		goto spin;
128 
129 	cpu_buffer->head_page = rb_hyp_buffer_page(reader->list.next);
130 	cpu_buffer->head_page->list.prev = &reader->list;
131 	cpu_buffer->reader_page = head;
132 	cpu_buffer->meta->reader_page.lost_events = overrun - cpu_buffer->last_overrun;
133 	cpu_buffer->meta->reader_page.id = cpu_buffer->reader_page->id;
134 	cpu_buffer->last_overrun = overrun;
135 
136 	return 0;
137 }
138 
139 static struct hyp_buffer_page *
rb_move_tail(struct hyp_rb_per_cpu * cpu_buffer)140 rb_move_tail(struct hyp_rb_per_cpu *cpu_buffer)
141 {
142 	struct hyp_buffer_page *tail_page, *new_tail, *new_head;
143 
144 	tail_page = cpu_buffer->tail_page;
145 	new_tail = rb_next_page(tail_page);
146 again:
147 	/*
148 	 * We caught the reader ... Let's try to move the head page.
149 	 * The writer can only rely on ->next links to check if this is head.
150 	 */
151 	if ((unsigned long)tail_page->list.next & HYP_RB_PAGE_HEAD) {
152 		/* The reader moved the head in between */
153 		if (!rb_set_flag(tail_page, HYP_RB_PAGE_UPDATE))
154 			goto again;
155 
156 		WRITE_ONCE(cpu_buffer->meta->overrun,
157 			   cpu_buffer->meta->overrun + new_tail->entries);
158 		WRITE_ONCE(cpu_buffer->meta->pages_lost,
159 			   cpu_buffer->meta->pages_lost + 1);
160 
161 		/* Move the head */
162 		rb_set_flag(new_tail, HYP_RB_PAGE_HEAD);
163 
164 		/* The new head is in place, reset the update flag */
165 		rb_set_flag(tail_page, 0);
166 
167 		new_head = rb_next_page(new_tail);
168 	}
169 
170 	local_set(&new_tail->page->commit, 0);
171 
172 	new_tail->write = 0;
173 	new_tail->entries = 0;
174 
175 	WRITE_ONCE(cpu_buffer->meta->pages_touched,
176 		   cpu_buffer->meta->pages_touched + 1);
177 	cpu_buffer->tail_page = new_tail;
178 
179 	return new_tail;
180 }
181 
rb_event_size(unsigned long length)182 unsigned long rb_event_size(unsigned long length)
183 {
184 	struct ring_buffer_event *event;
185 
186 	return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
187 }
188 
189 static struct ring_buffer_event *
rb_add_ts_extend(struct ring_buffer_event * event,u64 delta)190 rb_add_ts_extend(struct ring_buffer_event *event, u64 delta)
191 {
192 	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
193 	event->time_delta = delta & TS_MASK;
194 	event->array[0] = delta >> TS_SHIFT;
195 
196 	return (struct ring_buffer_event *)((unsigned long)event + 8);
197 }
198 
199 static struct ring_buffer_event *
rb_reserve_next(struct hyp_rb_per_cpu * cpu_buffer,unsigned long length)200 rb_reserve_next(struct hyp_rb_per_cpu *cpu_buffer, unsigned long length)
201 {
202 	unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
203 	struct hyp_buffer_page *tail_page = cpu_buffer->tail_page;
204 	struct ring_buffer_event *event;
205 	unsigned long write, prev_write;
206 	u64 ts, time_delta;
207 
208 	ts = trace_clock();
209 
210 	time_delta = ts - cpu_buffer->write_stamp;
211 
212 	if (test_time_stamp(time_delta))
213 		ts_ext_size = 8;
214 
215 	prev_write = tail_page->write;
216 	write = prev_write + event_size + ts_ext_size;
217 
218 	if (unlikely(write > BUF_PAGE_SIZE))
219 		tail_page = rb_move_tail(cpu_buffer);
220 
221 	if (!tail_page->entries) {
222 		tail_page->page->time_stamp = ts;
223 		time_delta = 0;
224 		ts_ext_size = 0;
225 		write = event_size;
226 		prev_write = 0;
227 	}
228 
229 	tail_page->write = write;
230 	tail_page->entries++;
231 
232 	cpu_buffer->write_stamp = ts;
233 
234 	event = (struct ring_buffer_event *)(tail_page->page->data +
235 					     prev_write);
236 	if (ts_ext_size) {
237 		event = rb_add_ts_extend(event, time_delta);
238 		time_delta = 0;
239 	}
240 
241 	event->type_len = 0;
242 	event->time_delta = time_delta;
243 	event->array[0] = event_size - RB_EVNT_HDR_SIZE;
244 
245 	return event;
246 }
247 
tracing_reserve_entry(unsigned long length)248 void *tracing_reserve_entry(unsigned long length)
249 {
250 	struct hyp_rb_per_cpu *cpu_buffer = this_cpu_ptr(&trace_rb);
251 	struct ring_buffer_event *rb_event;
252 
253 	if (atomic_cmpxchg(&cpu_buffer->status, HYP_RB_READY, HYP_RB_WRITING)
254 	    == HYP_RB_UNAVAILABLE)
255 		return NULL;
256 
257 	rb_event = rb_reserve_next(cpu_buffer, length);
258 
259 	return &rb_event->array[1];
260 }
261 
tracing_commit_entry(void)262 void tracing_commit_entry(void)
263 {
264 	struct hyp_rb_per_cpu *cpu_buffer = this_cpu_ptr(&trace_rb);
265 
266 	local_set(&cpu_buffer->tail_page->page->commit,
267 		  cpu_buffer->tail_page->write);
268 	WRITE_ONCE(cpu_buffer->meta->entries,
269 		   cpu_buffer->meta->entries + 1);
270 
271 	/* Paired with rb_cpu_disable_writing() */
272 	atomic_set_release(&cpu_buffer->status, HYP_RB_READY);
273 }
274 
rb_page_init(struct hyp_buffer_page * bpage,unsigned long hva)275 static int rb_page_init(struct hyp_buffer_page *bpage, unsigned long hva)
276 {
277 	void *hyp_va = (void *)kern_hyp_va(hva);
278 	int ret;
279 
280 	ret = hyp_pin_shared_mem(hyp_va, hyp_va + PAGE_SIZE);
281 	if (ret)
282 		return ret;
283 
284 	INIT_LIST_HEAD(&bpage->list);
285 	bpage->page = (struct buffer_data_page *)hyp_va;
286 
287 	local_set(&bpage->page->commit, 0);
288 
289 	return 0;
290 }
291 
rb_cpu_loaded(struct hyp_rb_per_cpu * cpu_buffer)292 static bool rb_cpu_loaded(struct hyp_rb_per_cpu *cpu_buffer)
293 {
294 	return !!cpu_buffer->bpages;
295 }
296 
rb_cpu_disable_writing(struct hyp_rb_per_cpu * cpu_buffer)297 static void rb_cpu_disable_writing(struct hyp_rb_per_cpu *cpu_buffer)
298 {
299 	int prev_status;
300 
301 	/* Wait for release of the buffer */
302 	do {
303 		prev_status = atomic_cmpxchg_acquire(&cpu_buffer->status,
304 						     HYP_RB_READY,
305 						     HYP_RB_UNAVAILABLE);
306 	} while (prev_status == HYP_RB_WRITING);
307 }
308 
rb_cpu_enable_writing(struct hyp_rb_per_cpu * cpu_buffer)309 static int rb_cpu_enable_writing(struct hyp_rb_per_cpu *cpu_buffer)
310 {
311 	if (!rb_cpu_loaded(cpu_buffer))
312 		return -ENODEV;
313 
314 	atomic_cmpxchg(&cpu_buffer->status, HYP_RB_UNAVAILABLE, HYP_RB_READY);
315 
316 	return 0;
317 }
318 
rb_cpu_teardown(struct hyp_rb_per_cpu * cpu_buffer)319 static void rb_cpu_teardown(struct hyp_rb_per_cpu *cpu_buffer)
320 {
321 	int i;
322 
323 	if (!rb_cpu_loaded(cpu_buffer))
324 		return;
325 
326 	rb_cpu_disable_writing(cpu_buffer);
327 
328 	hyp_unpin_shared_mem((void *)cpu_buffer->meta,
329 			     (void *)(cpu_buffer->meta) + PAGE_SIZE);
330 
331 	for (i = 0; i < cpu_buffer->nr_pages; i++) {
332 		struct hyp_buffer_page *bpage = &cpu_buffer->bpages[i];
333 
334 		if (!bpage->page)
335 			continue;
336 
337 		hyp_unpin_shared_mem((void *)bpage->page,
338 				     (void *)bpage->page + PAGE_SIZE);
339 	}
340 
341 	hyp_free(cpu_buffer->bpages);
342 	cpu_buffer->bpages = 0;
343 }
344 
rb_cpu_fits_desc(struct rb_page_desc * pdesc,unsigned long desc_end)345 static bool rb_cpu_fits_desc(struct rb_page_desc *pdesc,
346 			     unsigned long desc_end)
347 {
348 	unsigned long *end;
349 
350 	/* Check we can at least read nr_pages */
351 	if ((unsigned long)&pdesc->nr_page_va >= desc_end)
352 		return false;
353 
354 	end = &pdesc->page_va[pdesc->nr_page_va];
355 
356 	return (unsigned long)end <= desc_end;
357 }
358 
rb_cpu_init(struct rb_page_desc * pdesc,struct hyp_rb_per_cpu * cpu_buffer)359 static int rb_cpu_init(struct rb_page_desc *pdesc, struct hyp_rb_per_cpu *cpu_buffer)
360 {
361 	struct hyp_buffer_page *bpage;
362 	int i, ret;
363 
364 	/* At least 1 reader page and one head */
365 	if (pdesc->nr_page_va < 2)
366 		return -EINVAL;
367 
368 	if (rb_cpu_loaded(cpu_buffer))
369 		return -EBUSY;
370 
371 	bpage = hyp_alloc(sizeof(*bpage) * pdesc->nr_page_va);
372 	if (!bpage)
373 		return hyp_alloc_errno();
374 	cpu_buffer->bpages = bpage;
375 
376 	cpu_buffer->meta = (struct ring_buffer_meta *)kern_hyp_va(pdesc->meta_va);
377 	ret = hyp_pin_shared_mem((void *)cpu_buffer->meta,
378 				 ((void *)cpu_buffer->meta) + PAGE_SIZE);
379 	if (ret) {
380 		hyp_free(cpu_buffer->bpages);
381 		return ret;
382 	}
383 
384 	memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
385 	cpu_buffer->meta->meta_page_size = PAGE_SIZE;
386 	cpu_buffer->meta->nr_data_pages = cpu_buffer->nr_pages;
387 
388 	/* The reader page is not part of the ring initially */
389 	ret = rb_page_init(bpage, pdesc->page_va[0]);
390 	if (ret)
391 		goto err;
392 
393 	cpu_buffer->nr_pages = 1;
394 
395 	cpu_buffer->reader_page = bpage;
396 	cpu_buffer->tail_page = bpage + 1;
397 	cpu_buffer->head_page = bpage + 1;
398 
399 	for (i = 1; i < pdesc->nr_page_va; i++) {
400 		ret = rb_page_init(++bpage, pdesc->page_va[i]);
401 		if (ret)
402 			goto err;
403 
404 		bpage->list.next = &(bpage + 1)->list;
405 		bpage->list.prev = &(bpage - 1)->list;
406 		bpage->id = i;
407 
408 		cpu_buffer->nr_pages = i + 1;
409 	}
410 
411 	/* Close the ring */
412 	bpage->list.next = &cpu_buffer->tail_page->list;
413 	cpu_buffer->tail_page->list.prev = &bpage->list;
414 
415 	/* The last init'ed page points to the head page */
416 	rb_set_flag(bpage, HYP_RB_PAGE_HEAD);
417 
418 	cpu_buffer->last_overrun = 0;
419 
420 	return 0;
421 err:
422 	rb_cpu_teardown(cpu_buffer);
423 
424 	return ret;
425 }
426 
__pkvm_swap_reader_tracing(int cpu)427 int __pkvm_swap_reader_tracing(int cpu)
428 {
429 	struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
430 	int ret = 0;
431 
432 	hyp_spin_lock(&trace_rb_lock);
433 
434 	if (cpu >= hyp_nr_cpus) {
435 		ret = -EINVAL;
436 		goto err;
437 	}
438 
439 	cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
440 	if (!rb_cpu_loaded(cpu_buffer))
441 		ret = -ENODEV;
442 	else
443 		ret = rb_swap_reader_page(cpu_buffer);
444 err:
445 	hyp_spin_unlock(&trace_rb_lock);
446 
447 	return ret;
448 }
449 
__pkvm_teardown_tracing_locked(void)450 static void __pkvm_teardown_tracing_locked(void)
451 {
452 	int cpu;
453 
454 	hyp_assert_lock_held(&trace_rb_lock);
455 
456 	for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
457 		struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
458 
459 		rb_cpu_teardown(cpu_buffer);
460 	}
461 }
462 
__pkvm_teardown_tracing(void)463 void __pkvm_teardown_tracing(void)
464 {
465 	hyp_spin_lock(&trace_rb_lock);
466 	__pkvm_teardown_tracing_locked();
467 	hyp_spin_unlock(&trace_rb_lock);
468 }
469 
__pkvm_load_tracing(unsigned long desc_hva,size_t desc_size)470 int __pkvm_load_tracing(unsigned long desc_hva, size_t desc_size)
471 {
472 	struct hyp_trace_desc *desc = (struct hyp_trace_desc *)kern_hyp_va(desc_hva);
473 	struct trace_page_desc *trace_pdesc = &desc->page_desc;
474 	struct rb_page_desc *pdesc;
475 	int ret, cpu;
476 
477 	if (!desc_size || !PAGE_ALIGNED(desc_hva) || !PAGE_ALIGNED(desc_size))
478 		return -EINVAL;
479 
480 	ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn((void *)desc),
481 				     desc_size >> PAGE_SHIFT);
482 	if (ret)
483 		return ret;
484 
485 	hyp_spin_lock(&trace_rb_lock);
486 
487 	trace_clock_update(&desc->clock_data);
488 
489 	for_each_rb_page_desc(pdesc, cpu, trace_pdesc) {
490 		struct hyp_rb_per_cpu *cpu_buffer;
491 		int cpu;
492 
493 		ret = -EINVAL;
494 		if (!rb_cpu_fits_desc(pdesc, desc_hva + desc_size))
495 			break;
496 
497 		cpu = pdesc->cpu;
498 		if (cpu >= hyp_nr_cpus)
499 			break;
500 
501 		cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
502 
503 		ret = rb_cpu_init(pdesc, cpu_buffer);
504 		if (ret)
505 			break;
506 	}
507 	if (ret)
508 		__pkvm_teardown_tracing_locked();
509 
510 	hyp_spin_unlock(&trace_rb_lock);
511 
512 	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn((void *)desc),
513 				       desc_size >> PAGE_SHIFT));
514 	return ret;
515 }
516 
__pkvm_enable_tracing(bool enable)517 int __pkvm_enable_tracing(bool enable)
518 {
519 	int cpu, ret = enable ? -EINVAL : 0;
520 
521 	hyp_spin_lock(&trace_rb_lock);
522 	for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
523 		struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
524 
525 		if (enable) {
526 			if (!rb_cpu_enable_writing(cpu_buffer))
527 				ret = 0;
528 		} else {
529 			rb_cpu_disable_writing(cpu_buffer);
530 		}
531 
532 	}
533 	hyp_spin_unlock(&trace_rb_lock);
534 
535 	return ret;
536 }
537