1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * A memslot-related performance benchmark.
4 *
5 * Copyright (C) 2021 Oracle and/or its affiliates.
6 *
7 * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
8 */
9 #include <pthread.h>
10 #include <sched.h>
11 #include <semaphore.h>
12 #include <stdatomic.h>
13 #include <stdbool.h>
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/mman.h>
19 #include <time.h>
20 #include <unistd.h>
21
22 #include <linux/compiler.h>
23
24 #include <test_util.h>
25 #include <kvm_util.h>
26 #include <processor.h>
27
28 #define VCPU_ID 0
29
30 #define MEM_SIZE ((512U << 20) + 4096)
31 #define MEM_SIZE_PAGES (MEM_SIZE / 4096)
32 #define MEM_GPA 0x10000000UL
33 #define MEM_AUX_GPA MEM_GPA
34 #define MEM_SYNC_GPA MEM_AUX_GPA
35 #define MEM_TEST_GPA (MEM_AUX_GPA + 4096)
36 #define MEM_TEST_SIZE (MEM_SIZE - 4096)
37 static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
38 static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
39
40 /*
41 * 32 MiB is max size that gets well over 100 iterations on 509 slots.
42 * Considering that each slot needs to have at least one page up to
43 * 8194 slots in use can then be tested (although with slightly
44 * limited resolution).
45 */
46 #define MEM_SIZE_MAP ((32U << 20) + 4096)
47 #define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096)
48 #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096)
49 #define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
50 static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
51 static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
52 static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
53 static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
54
55 /*
56 * 128 MiB is min size that fills 32k slots with at least one page in each
57 * while at the same time gets 100+ iterations in such test
58 */
59 #define MEM_TEST_UNMAP_SIZE (128U << 20)
60 #define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096)
61 /* 2 MiB chunk size like a typical huge page */
62 #define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12))
63 static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
64 "invalid unmap test region size");
65 static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
66 "invalid unmap test region size");
67 static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
68 (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
69 "invalid unmap test region size");
70
71 /*
72 * For the move active test the middle of the test area is placed on
73 * a memslot boundary: half lies in the memslot being moved, half in
74 * other memslot(s).
75 *
76 * When running this test with 32k memslots (32764, really) each memslot
77 * contains 4 pages.
78 * The last one additionally contains the remaining 21 pages of memory,
79 * for the total size of 25 pages.
80 * Hence, the maximum size here is 50 pages.
81 */
82 #define MEM_TEST_MOVE_SIZE_PAGES (50)
83 #define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096)
84 #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
85 static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
86 "invalid move test region size");
87
88 #define MEM_TEST_VAL_1 0x1122334455667788
89 #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
90
91 struct vm_data {
92 struct kvm_vm *vm;
93 pthread_t vcpu_thread;
94 uint32_t nslots;
95 uint64_t npages;
96 uint64_t pages_per_slot;
97 void **hva_slots;
98 bool mmio_ok;
99 uint64_t mmio_gpa_min;
100 uint64_t mmio_gpa_max;
101 };
102
103 struct sync_area {
104 atomic_bool start_flag;
105 atomic_bool exit_flag;
106 atomic_bool sync_flag;
107 void *move_area_ptr;
108 };
109
110 /*
111 * Technically, we need also for the atomic bool to be address-free, which
112 * is recommended, but not strictly required, by C11 for lockless
113 * implementations.
114 * However, in practice both GCC and Clang fulfill this requirement on
115 * all KVM-supported platforms.
116 */
117 static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
118
119 static sem_t vcpu_ready;
120
121 static bool map_unmap_verify;
122
123 static bool verbose;
124 #define pr_info_v(...) \
125 do { \
126 if (verbose) \
127 pr_info(__VA_ARGS__); \
128 } while (0)
129
check_mmio_access(struct vm_data * vm,struct kvm_run * run)130 static void check_mmio_access(struct vm_data *vm, struct kvm_run *run)
131 {
132 TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
133 TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
134 TEST_ASSERT(run->mmio.len == 8,
135 "Unexpected exit mmio size = %u", run->mmio.len);
136 TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
137 run->mmio.phys_addr <= vm->mmio_gpa_max,
138 "Unexpected exit mmio address = 0x%llx",
139 run->mmio.phys_addr);
140 }
141
vcpu_worker(void * data)142 static void *vcpu_worker(void *data)
143 {
144 struct vm_data *vm = data;
145 struct kvm_run *run;
146 struct ucall uc;
147
148 run = vcpu_state(vm->vm, VCPU_ID);
149 while (1) {
150 vcpu_run(vm->vm, VCPU_ID);
151
152 switch (get_ucall(vm->vm, VCPU_ID, &uc)) {
153 case UCALL_SYNC:
154 TEST_ASSERT(uc.args[1] == 0,
155 "Unexpected sync ucall, got %lx",
156 (ulong)uc.args[1]);
157 sem_post(&vcpu_ready);
158 continue;
159 case UCALL_NONE:
160 if (run->exit_reason == KVM_EXIT_MMIO)
161 check_mmio_access(vm, run);
162 else
163 goto done;
164 break;
165 case UCALL_ABORT:
166 TEST_FAIL("%s at %s:%ld, val = %lu",
167 (const char *)uc.args[0],
168 __FILE__, uc.args[1], uc.args[2]);
169 break;
170 case UCALL_DONE:
171 goto done;
172 default:
173 TEST_FAIL("Unknown ucall %lu", uc.cmd);
174 }
175 }
176
177 done:
178 return NULL;
179 }
180
wait_for_vcpu(void)181 static void wait_for_vcpu(void)
182 {
183 struct timespec ts;
184
185 TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
186 "clock_gettime() failed: %d\n", errno);
187
188 ts.tv_sec += 2;
189 TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
190 "sem_timedwait() failed: %d\n", errno);
191 }
192
vm_gpa2hva(struct vm_data * data,uint64_t gpa,uint64_t * rempages)193 static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
194 {
195 uint64_t gpage, pgoffs;
196 uint32_t slot, slotoffs;
197 void *base;
198
199 TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
200 TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
201 "Too high gpa to translate");
202 gpa -= MEM_GPA;
203
204 gpage = gpa / 4096;
205 pgoffs = gpa % 4096;
206 slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
207 slotoffs = gpage - (slot * data->pages_per_slot);
208
209 if (rempages) {
210 uint64_t slotpages;
211
212 if (slot == data->nslots - 1)
213 slotpages = data->npages - slot * data->pages_per_slot;
214 else
215 slotpages = data->pages_per_slot;
216
217 TEST_ASSERT(!pgoffs,
218 "Asking for remaining pages in slot but gpa not page aligned");
219 *rempages = slotpages - slotoffs;
220 }
221
222 base = data->hva_slots[slot];
223 return (uint8_t *)base + slotoffs * 4096 + pgoffs;
224 }
225
vm_slot2gpa(struct vm_data * data,uint32_t slot)226 static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
227 {
228 TEST_ASSERT(slot < data->nslots, "Too high slot number");
229
230 return MEM_GPA + slot * data->pages_per_slot * 4096;
231 }
232
alloc_vm(void)233 static struct vm_data *alloc_vm(void)
234 {
235 struct vm_data *data;
236
237 data = malloc(sizeof(*data));
238 TEST_ASSERT(data, "malloc(vmdata) failed");
239
240 data->vm = NULL;
241 data->hva_slots = NULL;
242
243 return data;
244 }
245
prepare_vm(struct vm_data * data,int nslots,uint64_t * maxslots,void * guest_code,uint64_t mempages,struct timespec * slot_runtime)246 static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
247 void *guest_code, uint64_t mempages,
248 struct timespec *slot_runtime)
249 {
250 uint32_t max_mem_slots;
251 uint64_t rempages;
252 uint64_t guest_addr;
253 uint32_t slot;
254 struct timespec tstart;
255 struct sync_area *sync;
256
257 max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
258 TEST_ASSERT(max_mem_slots > 1,
259 "KVM_CAP_NR_MEMSLOTS should be greater than 1");
260 TEST_ASSERT(nslots > 1 || nslots == -1,
261 "Slot count cap should be greater than 1");
262 if (nslots != -1)
263 max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
264 pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
265
266 TEST_ASSERT(mempages > 1,
267 "Can't test without any memory");
268
269 data->npages = mempages;
270 data->nslots = max_mem_slots - 1;
271 data->pages_per_slot = mempages / data->nslots;
272 if (!data->pages_per_slot) {
273 *maxslots = mempages + 1;
274 return false;
275 }
276
277 rempages = mempages % data->nslots;
278 data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
279 TEST_ASSERT(data->hva_slots, "malloc() fail");
280
281 data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
282 ucall_init(data->vm, NULL);
283
284 pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
285 max_mem_slots - 1, data->pages_per_slot, rempages);
286
287 clock_gettime(CLOCK_MONOTONIC, &tstart);
288 for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
289 uint64_t npages;
290
291 npages = data->pages_per_slot;
292 if (slot == max_mem_slots - 1)
293 npages += rempages;
294
295 vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
296 guest_addr, slot, npages,
297 0);
298 guest_addr += npages * 4096;
299 }
300 *slot_runtime = timespec_elapsed(tstart);
301
302 for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
303 uint64_t npages;
304 uint64_t gpa;
305
306 npages = data->pages_per_slot;
307 if (slot == max_mem_slots - 2)
308 npages += rempages;
309
310 gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
311 slot + 1);
312 TEST_ASSERT(gpa == guest_addr,
313 "vm_phy_pages_alloc() failed\n");
314
315 data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
316 memset(data->hva_slots[slot], 0, npages * 4096);
317
318 guest_addr += npages * 4096;
319 }
320
321 virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
322
323 sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
324 atomic_init(&sync->start_flag, false);
325 atomic_init(&sync->exit_flag, false);
326 atomic_init(&sync->sync_flag, false);
327
328 data->mmio_ok = false;
329
330 return true;
331 }
332
launch_vm(struct vm_data * data)333 static void launch_vm(struct vm_data *data)
334 {
335 pr_info_v("Launching the test VM\n");
336
337 pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
338
339 /* Ensure the guest thread is spun up. */
340 wait_for_vcpu();
341 }
342
free_vm(struct vm_data * data)343 static void free_vm(struct vm_data *data)
344 {
345 kvm_vm_free(data->vm);
346 free(data->hva_slots);
347 free(data);
348 }
349
wait_guest_exit(struct vm_data * data)350 static void wait_guest_exit(struct vm_data *data)
351 {
352 pthread_join(data->vcpu_thread, NULL);
353 }
354
let_guest_run(struct sync_area * sync)355 static void let_guest_run(struct sync_area *sync)
356 {
357 atomic_store_explicit(&sync->start_flag, true, memory_order_release);
358 }
359
guest_spin_until_start(void)360 static void guest_spin_until_start(void)
361 {
362 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
363
364 while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
365 ;
366 }
367
make_guest_exit(struct sync_area * sync)368 static void make_guest_exit(struct sync_area *sync)
369 {
370 atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
371 }
372
_guest_should_exit(void)373 static bool _guest_should_exit(void)
374 {
375 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
376
377 return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
378 }
379
380 #define guest_should_exit() unlikely(_guest_should_exit())
381
382 /*
383 * noinline so we can easily see how much time the host spends waiting
384 * for the guest.
385 * For the same reason use alarm() instead of polling clock_gettime()
386 * to implement a wait timeout.
387 */
host_perform_sync(struct sync_area * sync)388 static noinline void host_perform_sync(struct sync_area *sync)
389 {
390 alarm(2);
391
392 atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
393 while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
394 ;
395
396 alarm(0);
397 }
398
guest_perform_sync(void)399 static bool guest_perform_sync(void)
400 {
401 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
402 bool expected;
403
404 do {
405 if (guest_should_exit())
406 return false;
407
408 expected = true;
409 } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
410 &expected, false,
411 memory_order_acq_rel,
412 memory_order_relaxed));
413
414 return true;
415 }
416
guest_code_test_memslot_move(void)417 static void guest_code_test_memslot_move(void)
418 {
419 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
420 uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
421
422 GUEST_SYNC(0);
423
424 guest_spin_until_start();
425
426 while (!guest_should_exit()) {
427 uintptr_t ptr;
428
429 for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
430 ptr += 4096)
431 *(uint64_t *)ptr = MEM_TEST_VAL_1;
432
433 /*
434 * No host sync here since the MMIO exits are so expensive
435 * that the host would spend most of its time waiting for
436 * the guest and so instead of measuring memslot move
437 * performance we would measure the performance and
438 * likelihood of MMIO exits
439 */
440 }
441
442 GUEST_DONE();
443 }
444
guest_code_test_memslot_map(void)445 static void guest_code_test_memslot_map(void)
446 {
447 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
448
449 GUEST_SYNC(0);
450
451 guest_spin_until_start();
452
453 while (1) {
454 uintptr_t ptr;
455
456 for (ptr = MEM_TEST_GPA;
457 ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
458 *(uint64_t *)ptr = MEM_TEST_VAL_1;
459
460 if (!guest_perform_sync())
461 break;
462
463 for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
464 ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
465 *(uint64_t *)ptr = MEM_TEST_VAL_2;
466
467 if (!guest_perform_sync())
468 break;
469 }
470
471 GUEST_DONE();
472 }
473
guest_code_test_memslot_unmap(void)474 static void guest_code_test_memslot_unmap(void)
475 {
476 struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
477
478 GUEST_SYNC(0);
479
480 guest_spin_until_start();
481
482 while (1) {
483 uintptr_t ptr = MEM_TEST_GPA;
484
485 /*
486 * We can afford to access (map) just a small number of pages
487 * per host sync as otherwise the host will spend
488 * a significant amount of its time waiting for the guest
489 * (instead of doing unmap operations), so this will
490 * effectively turn this test into a map performance test.
491 *
492 * Just access a single page to be on the safe side.
493 */
494 *(uint64_t *)ptr = MEM_TEST_VAL_1;
495
496 if (!guest_perform_sync())
497 break;
498
499 ptr += MEM_TEST_UNMAP_SIZE / 2;
500 *(uint64_t *)ptr = MEM_TEST_VAL_2;
501
502 if (!guest_perform_sync())
503 break;
504 }
505
506 GUEST_DONE();
507 }
508
guest_code_test_memslot_rw(void)509 static void guest_code_test_memslot_rw(void)
510 {
511 GUEST_SYNC(0);
512
513 guest_spin_until_start();
514
515 while (1) {
516 uintptr_t ptr;
517
518 for (ptr = MEM_TEST_GPA;
519 ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
520 *(uint64_t *)ptr = MEM_TEST_VAL_1;
521
522 if (!guest_perform_sync())
523 break;
524
525 for (ptr = MEM_TEST_GPA + 4096 / 2;
526 ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
527 uint64_t val = *(uint64_t *)ptr;
528
529 GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
530 *(uint64_t *)ptr = 0;
531 }
532
533 if (!guest_perform_sync())
534 break;
535 }
536
537 GUEST_DONE();
538 }
539
test_memslot_move_prepare(struct vm_data * data,struct sync_area * sync,uint64_t * maxslots,bool isactive)540 static bool test_memslot_move_prepare(struct vm_data *data,
541 struct sync_area *sync,
542 uint64_t *maxslots, bool isactive)
543 {
544 uint64_t movesrcgpa, movetestgpa;
545
546 movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
547
548 if (isactive) {
549 uint64_t lastpages;
550
551 vm_gpa2hva(data, movesrcgpa, &lastpages);
552 if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
553 *maxslots = 0;
554 return false;
555 }
556 }
557
558 movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
559 sync->move_area_ptr = (void *)movetestgpa;
560
561 if (isactive) {
562 data->mmio_ok = true;
563 data->mmio_gpa_min = movesrcgpa;
564 data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
565 }
566
567 return true;
568 }
569
test_memslot_move_prepare_active(struct vm_data * data,struct sync_area * sync,uint64_t * maxslots)570 static bool test_memslot_move_prepare_active(struct vm_data *data,
571 struct sync_area *sync,
572 uint64_t *maxslots)
573 {
574 return test_memslot_move_prepare(data, sync, maxslots, true);
575 }
576
test_memslot_move_prepare_inactive(struct vm_data * data,struct sync_area * sync,uint64_t * maxslots)577 static bool test_memslot_move_prepare_inactive(struct vm_data *data,
578 struct sync_area *sync,
579 uint64_t *maxslots)
580 {
581 return test_memslot_move_prepare(data, sync, maxslots, false);
582 }
583
test_memslot_move_loop(struct vm_data * data,struct sync_area * sync)584 static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
585 {
586 uint64_t movesrcgpa;
587
588 movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
589 vm_mem_region_move(data->vm, data->nslots - 1 + 1,
590 MEM_TEST_MOVE_GPA_DEST);
591 vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
592 }
593
test_memslot_do_unmap(struct vm_data * data,uint64_t offsp,uint64_t count)594 static void test_memslot_do_unmap(struct vm_data *data,
595 uint64_t offsp, uint64_t count)
596 {
597 uint64_t gpa, ctr;
598
599 for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
600 uint64_t npages;
601 void *hva;
602 int ret;
603
604 hva = vm_gpa2hva(data, gpa, &npages);
605 TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
606 npages = min(npages, count - ctr);
607 ret = madvise(hva, npages * 4096, MADV_DONTNEED);
608 TEST_ASSERT(!ret,
609 "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
610 hva, gpa);
611 ctr += npages;
612 gpa += npages * 4096;
613 }
614 TEST_ASSERT(ctr == count,
615 "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
616 }
617
test_memslot_map_unmap_check(struct vm_data * data,uint64_t offsp,uint64_t valexp)618 static void test_memslot_map_unmap_check(struct vm_data *data,
619 uint64_t offsp, uint64_t valexp)
620 {
621 uint64_t gpa;
622 uint64_t *val;
623
624 if (!map_unmap_verify)
625 return;
626
627 gpa = MEM_TEST_GPA + offsp * 4096;
628 val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
629 TEST_ASSERT(*val == valexp,
630 "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
631 *val, valexp, gpa);
632 *val = 0;
633 }
634
test_memslot_map_loop(struct vm_data * data,struct sync_area * sync)635 static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
636 {
637 /*
638 * Unmap the second half of the test area while guest writes to (maps)
639 * the first half.
640 */
641 test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
642 MEM_TEST_MAP_SIZE_PAGES / 2);
643
644 /*
645 * Wait for the guest to finish writing the first half of the test
646 * area, verify the written value on the first and the last page of
647 * this area and then unmap it.
648 * Meanwhile, the guest is writing to (mapping) the second half of
649 * the test area.
650 */
651 host_perform_sync(sync);
652 test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
653 test_memslot_map_unmap_check(data,
654 MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
655 MEM_TEST_VAL_1);
656 test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
657
658
659 /*
660 * Wait for the guest to finish writing the second half of the test
661 * area and verify the written value on the first and the last page
662 * of this area.
663 * The area will be unmapped at the beginning of the next loop
664 * iteration.
665 * Meanwhile, the guest is writing to (mapping) the first half of
666 * the test area.
667 */
668 host_perform_sync(sync);
669 test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
670 MEM_TEST_VAL_2);
671 test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
672 MEM_TEST_VAL_2);
673 }
674
test_memslot_unmap_loop_common(struct vm_data * data,struct sync_area * sync,uint64_t chunk)675 static void test_memslot_unmap_loop_common(struct vm_data *data,
676 struct sync_area *sync,
677 uint64_t chunk)
678 {
679 uint64_t ctr;
680
681 /*
682 * Wait for the guest to finish mapping page(s) in the first half
683 * of the test area, verify the written value and then perform unmap
684 * of this area.
685 * Meanwhile, the guest is writing to (mapping) page(s) in the second
686 * half of the test area.
687 */
688 host_perform_sync(sync);
689 test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
690 for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
691 test_memslot_do_unmap(data, ctr, chunk);
692
693 /* Likewise, but for the opposite host / guest areas */
694 host_perform_sync(sync);
695 test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
696 MEM_TEST_VAL_2);
697 for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
698 ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
699 test_memslot_do_unmap(data, ctr, chunk);
700 }
701
test_memslot_unmap_loop(struct vm_data * data,struct sync_area * sync)702 static void test_memslot_unmap_loop(struct vm_data *data,
703 struct sync_area *sync)
704 {
705 test_memslot_unmap_loop_common(data, sync, 1);
706 }
707
test_memslot_unmap_loop_chunked(struct vm_data * data,struct sync_area * sync)708 static void test_memslot_unmap_loop_chunked(struct vm_data *data,
709 struct sync_area *sync)
710 {
711 test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
712 }
713
test_memslot_rw_loop(struct vm_data * data,struct sync_area * sync)714 static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
715 {
716 uint64_t gptr;
717
718 for (gptr = MEM_TEST_GPA + 4096 / 2;
719 gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
720 *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
721
722 host_perform_sync(sync);
723
724 for (gptr = MEM_TEST_GPA;
725 gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
726 uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
727 uint64_t val = *vptr;
728
729 TEST_ASSERT(val == MEM_TEST_VAL_1,
730 "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
731 val, gptr);
732 *vptr = 0;
733 }
734
735 host_perform_sync(sync);
736 }
737
738 struct test_data {
739 const char *name;
740 uint64_t mem_size;
741 void (*guest_code)(void);
742 bool (*prepare)(struct vm_data *data, struct sync_area *sync,
743 uint64_t *maxslots);
744 void (*loop)(struct vm_data *data, struct sync_area *sync);
745 };
746
test_execute(int nslots,uint64_t * maxslots,unsigned int maxtime,const struct test_data * tdata,uint64_t * nloops,struct timespec * slot_runtime,struct timespec * guest_runtime)747 static bool test_execute(int nslots, uint64_t *maxslots,
748 unsigned int maxtime,
749 const struct test_data *tdata,
750 uint64_t *nloops,
751 struct timespec *slot_runtime,
752 struct timespec *guest_runtime)
753 {
754 uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
755 struct vm_data *data;
756 struct sync_area *sync;
757 struct timespec tstart;
758 bool ret = true;
759
760 data = alloc_vm();
761 if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
762 mem_size, slot_runtime)) {
763 ret = false;
764 goto exit_free;
765 }
766
767 sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
768
769 if (tdata->prepare &&
770 !tdata->prepare(data, sync, maxslots)) {
771 ret = false;
772 goto exit_free;
773 }
774
775 launch_vm(data);
776
777 clock_gettime(CLOCK_MONOTONIC, &tstart);
778 let_guest_run(sync);
779
780 while (1) {
781 *guest_runtime = timespec_elapsed(tstart);
782 if (guest_runtime->tv_sec >= maxtime)
783 break;
784
785 tdata->loop(data, sync);
786
787 (*nloops)++;
788 }
789
790 make_guest_exit(sync);
791 wait_guest_exit(data);
792
793 exit_free:
794 free_vm(data);
795
796 return ret;
797 }
798
799 static const struct test_data tests[] = {
800 {
801 .name = "map",
802 .mem_size = MEM_SIZE_MAP_PAGES,
803 .guest_code = guest_code_test_memslot_map,
804 .loop = test_memslot_map_loop,
805 },
806 {
807 .name = "unmap",
808 .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
809 .guest_code = guest_code_test_memslot_unmap,
810 .loop = test_memslot_unmap_loop,
811 },
812 {
813 .name = "unmap chunked",
814 .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
815 .guest_code = guest_code_test_memslot_unmap,
816 .loop = test_memslot_unmap_loop_chunked,
817 },
818 {
819 .name = "move active area",
820 .guest_code = guest_code_test_memslot_move,
821 .prepare = test_memslot_move_prepare_active,
822 .loop = test_memslot_move_loop,
823 },
824 {
825 .name = "move inactive area",
826 .guest_code = guest_code_test_memslot_move,
827 .prepare = test_memslot_move_prepare_inactive,
828 .loop = test_memslot_move_loop,
829 },
830 {
831 .name = "RW",
832 .guest_code = guest_code_test_memslot_rw,
833 .loop = test_memslot_rw_loop
834 },
835 };
836
837 #define NTESTS ARRAY_SIZE(tests)
838
839 struct test_args {
840 int tfirst;
841 int tlast;
842 int nslots;
843 int seconds;
844 int runs;
845 };
846
help(char * name,struct test_args * targs)847 static void help(char *name, struct test_args *targs)
848 {
849 int ctr;
850
851 pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
852 name);
853 pr_info(" -h: print this help screen.\n");
854 pr_info(" -v: enable verbose mode (not for benchmarking).\n");
855 pr_info(" -d: enable extra debug checks.\n");
856 pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
857 targs->nslots);
858 pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
859 targs->tfirst, NTESTS - 1);
860 pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
861 targs->tlast, NTESTS - 1);
862 pr_info(" -l: specify the test length in seconds (currently: %i)\n",
863 targs->seconds);
864 pr_info(" -r: specify the number of runs per test (currently: %i)\n",
865 targs->runs);
866
867 pr_info("\nAvailable tests:\n");
868 for (ctr = 0; ctr < NTESTS; ctr++)
869 pr_info("%d: %s\n", ctr, tests[ctr].name);
870 }
871
parse_args(int argc,char * argv[],struct test_args * targs)872 static bool parse_args(int argc, char *argv[],
873 struct test_args *targs)
874 {
875 int opt;
876
877 while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
878 switch (opt) {
879 case 'h':
880 default:
881 help(argv[0], targs);
882 return false;
883 case 'v':
884 verbose = true;
885 break;
886 case 'd':
887 map_unmap_verify = true;
888 break;
889 case 's':
890 targs->nslots = atoi(optarg);
891 if (targs->nslots <= 0 && targs->nslots != -1) {
892 pr_info("Slot count cap has to be positive or -1 for no cap\n");
893 return false;
894 }
895 break;
896 case 'f':
897 targs->tfirst = atoi(optarg);
898 if (targs->tfirst < 0) {
899 pr_info("First test to run has to be non-negative\n");
900 return false;
901 }
902 break;
903 case 'e':
904 targs->tlast = atoi(optarg);
905 if (targs->tlast < 0 || targs->tlast >= NTESTS) {
906 pr_info("Last test to run has to be non-negative and less than %zu\n",
907 NTESTS);
908 return false;
909 }
910 break;
911 case 'l':
912 targs->seconds = atoi(optarg);
913 if (targs->seconds < 0) {
914 pr_info("Test length in seconds has to be non-negative\n");
915 return false;
916 }
917 break;
918 case 'r':
919 targs->runs = atoi(optarg);
920 if (targs->runs <= 0) {
921 pr_info("Runs per test has to be positive\n");
922 return false;
923 }
924 break;
925 }
926 }
927
928 if (optind < argc) {
929 help(argv[0], targs);
930 return false;
931 }
932
933 if (targs->tfirst > targs->tlast) {
934 pr_info("First test to run cannot be greater than the last test to run\n");
935 return false;
936 }
937
938 return true;
939 }
940
941 struct test_result {
942 struct timespec slot_runtime, guest_runtime, iter_runtime;
943 int64_t slottimens, runtimens;
944 uint64_t nloops;
945 };
946
test_loop(const struct test_data * data,const struct test_args * targs,struct test_result * rbestslottime,struct test_result * rbestruntime)947 static bool test_loop(const struct test_data *data,
948 const struct test_args *targs,
949 struct test_result *rbestslottime,
950 struct test_result *rbestruntime)
951 {
952 uint64_t maxslots;
953 struct test_result result;
954
955 result.nloops = 0;
956 if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
957 &result.nloops,
958 &result.slot_runtime, &result.guest_runtime)) {
959 if (maxslots)
960 pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
961 maxslots);
962 else
963 pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
964
965 return false;
966 }
967
968 pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
969 result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
970 result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
971 if (!result.nloops) {
972 pr_info("No full loops done - too short test time or system too loaded?\n");
973 return true;
974 }
975
976 result.iter_runtime = timespec_div(result.guest_runtime,
977 result.nloops);
978 pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
979 result.nloops,
980 result.iter_runtime.tv_sec,
981 result.iter_runtime.tv_nsec);
982 result.slottimens = timespec_to_ns(result.slot_runtime);
983 result.runtimens = timespec_to_ns(result.iter_runtime);
984
985 /*
986 * Only rank the slot setup time for tests using the whole test memory
987 * area so they are comparable
988 */
989 if (!data->mem_size &&
990 (!rbestslottime->slottimens ||
991 result.slottimens < rbestslottime->slottimens))
992 *rbestslottime = result;
993 if (!rbestruntime->runtimens ||
994 result.runtimens < rbestruntime->runtimens)
995 *rbestruntime = result;
996
997 return true;
998 }
999
main(int argc,char * argv[])1000 int main(int argc, char *argv[])
1001 {
1002 struct test_args targs = {
1003 .tfirst = 0,
1004 .tlast = NTESTS - 1,
1005 .nslots = -1,
1006 .seconds = 5,
1007 .runs = 1,
1008 };
1009 struct test_result rbestslottime;
1010 int tctr;
1011
1012 /* Tell stdout not to buffer its content */
1013 setbuf(stdout, NULL);
1014
1015 if (!parse_args(argc, argv, &targs))
1016 return -1;
1017
1018 rbestslottime.slottimens = 0;
1019 for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
1020 const struct test_data *data = &tests[tctr];
1021 unsigned int runctr;
1022 struct test_result rbestruntime;
1023
1024 if (tctr > targs.tfirst)
1025 pr_info("\n");
1026
1027 pr_info("Testing %s performance with %i runs, %d seconds each\n",
1028 data->name, targs.runs, targs.seconds);
1029
1030 rbestruntime.runtimens = 0;
1031 for (runctr = 0; runctr < targs.runs; runctr++)
1032 if (!test_loop(data, &targs,
1033 &rbestslottime, &rbestruntime))
1034 break;
1035
1036 if (rbestruntime.runtimens)
1037 pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
1038 rbestruntime.iter_runtime.tv_sec,
1039 rbestruntime.iter_runtime.tv_nsec,
1040 rbestruntime.nloops);
1041 }
1042
1043 if (rbestslottime.slottimens)
1044 pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
1045 rbestslottime.slot_runtime.tv_sec,
1046 rbestslottime.slot_runtime.tv_nsec);
1047
1048 return 0;
1049 }
1050