1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
4 */
5
6 #include <linux/anon_inodes.h>
7 #include <linux/file.h>
8 #include <linux/gunyah.h>
9 #include <linux/interrupt.h>
10 #include <linux/kref.h>
11 #include <linux/mm.h>
12 #include <linux/module.h>
13 #include <linux/types.h>
14 #include <linux/wait.h>
15
16 #include "vm_mgr.h"
17
18 #include <uapi/linux/gunyah.h>
19 #include <trace/hooks/gunyah.h>
20
21 #define MAX_VCPU_NAME 20 /* gh-vcpu:strlen(U32::MAX)+NUL */
22
vcpu_release(struct kref * kref)23 static void vcpu_release(struct kref *kref)
24 {
25 struct gunyah_vcpu *vcpu = container_of(kref, struct gunyah_vcpu, kref);
26
27 free_page((unsigned long)vcpu->vcpu_run);
28 kfree(vcpu);
29 }
30
31 /*
32 * When hypervisor allows us to schedule vCPU again, it gives us an interrupt
33 */
gunyah_vcpu_irq_handler(int irq,void * data)34 static irqreturn_t gunyah_vcpu_irq_handler(int irq, void *data)
35 {
36 struct gunyah_vcpu *vcpu = data;
37
38 complete(&vcpu->ready);
39 return IRQ_HANDLED;
40 }
41
gunyah_handle_page_fault(struct gunyah_vcpu * vcpu,const struct gunyah_hypercall_vcpu_run_resp * vcpu_run_resp)42 static bool gunyah_handle_page_fault(
43 struct gunyah_vcpu *vcpu,
44 const struct gunyah_hypercall_vcpu_run_resp *vcpu_run_resp)
45 {
46 u64 addr = vcpu_run_resp->state_data[0];
47 bool write = !!vcpu_run_resp->state_data[1];
48 int ret = 0;
49
50 ret = gunyah_demand_page(vcpu->ghvm, addr, write);
51 if (!ret || ret == -EAGAIN)
52 return true;
53
54 vcpu->vcpu_run->page_fault.resume_action = GUNYAH_VCPU_RESUME_FAULT;
55 vcpu->vcpu_run->page_fault.attempt = ret;
56 vcpu->vcpu_run->page_fault.phys_addr = addr;
57 vcpu->vcpu_run->exit_reason = GUNYAH_VCPU_EXIT_PAGE_FAULT;
58 return false;
59 }
60
61 static bool
gunyah_handle_mmio(struct gunyah_vcpu * vcpu,unsigned long resume_data[3],const struct gunyah_hypercall_vcpu_run_resp * vcpu_run_resp)62 gunyah_handle_mmio(struct gunyah_vcpu *vcpu, unsigned long resume_data[3],
63 const struct gunyah_hypercall_vcpu_run_resp *vcpu_run_resp)
64 {
65 u64 addr = vcpu_run_resp->state_data[0],
66 len = vcpu_run_resp->state_data[1],
67 data = vcpu_run_resp->state_data[2];
68 int ret;
69
70 if (WARN_ON(len > sizeof(u64)))
71 len = sizeof(u64);
72
73 ret = gunyah_demand_page(vcpu->ghvm, addr,
74 vcpu->vcpu_run->mmio.is_write);
75 if (!ret || ret == -EAGAIN) {
76 resume_data[1] = GUNYAH_ADDRSPACE_VMMIO_ACTION_RETRY;
77 return true;
78 }
79
80 if (vcpu_run_resp->state == GUNYAH_VCPU_ADDRSPACE_VMMIO_READ) {
81 vcpu->vcpu_run->mmio.is_write = 0;
82 /* Record that we need to give vCPU user's supplied value next gunyah_vcpu_run() */
83 vcpu->state = GUNYAH_VCPU_RUN_STATE_MMIO_READ;
84 vcpu->mmio_read_len = len;
85 } else { /* GUNYAH_VCPU_ADDRSPACE_VMMIO_WRITE */
86 if (!gunyah_vm_mmio_write(vcpu->ghvm, addr, len, data)) {
87 resume_data[0] = GUNYAH_ADDRSPACE_VMMIO_ACTION_EMULATE;
88 return true;
89 }
90 vcpu->vcpu_run->mmio.is_write = 1;
91 memcpy(vcpu->vcpu_run->mmio.data, &data, len);
92 vcpu->state = GUNYAH_VCPU_RUN_STATE_MMIO_WRITE;
93 }
94
95 /* Assume userspace is okay and handles the access due to existing userspace */
96 vcpu->vcpu_run->mmio.resume_action = GUNYAH_VCPU_RESUME_HANDLED;
97 vcpu->mmio_addr = vcpu->vcpu_run->mmio.phys_addr = addr;
98 vcpu->vcpu_run->mmio.len = len;
99 vcpu->vcpu_run->exit_reason = GUNYAH_VCPU_EXIT_MMIO;
100
101 return false;
102 }
103
gunyah_handle_mmio_resume(struct gunyah_vcpu * vcpu,unsigned long resume_data[3])104 static int gunyah_handle_mmio_resume(struct gunyah_vcpu *vcpu,
105 unsigned long resume_data[3])
106 {
107 switch (vcpu->vcpu_run->mmio.resume_action) {
108 case GUNYAH_VCPU_RESUME_HANDLED:
109 if (vcpu->state == GUNYAH_VCPU_RUN_STATE_MMIO_READ) {
110 if (unlikely(vcpu->mmio_read_len >
111 sizeof(resume_data[0])))
112 vcpu->mmio_read_len = sizeof(resume_data[0]);
113 memcpy(&resume_data[0], vcpu->vcpu_run->mmio.data,
114 vcpu->mmio_read_len);
115 }
116 resume_data[1] = GUNYAH_ADDRSPACE_VMMIO_ACTION_EMULATE;
117 break;
118 case GUNYAH_VCPU_RESUME_FAULT:
119 resume_data[1] = GUNYAH_ADDRSPACE_VMMIO_ACTION_FAULT;
120 break;
121 default:
122 return -EINVAL;
123 }
124
125 return 0;
126 }
127
gunyah_vcpu_rm_notification(struct notifier_block * nb,unsigned long action,void * data)128 static int gunyah_vcpu_rm_notification(struct notifier_block *nb,
129 unsigned long action, void *data)
130 {
131 struct gunyah_vcpu *vcpu = container_of(nb, struct gunyah_vcpu, nb);
132 struct gunyah_rm_vm_exited_payload *exit_payload = data;
133
134 /* Wake up userspace waiting for the vCPU to be runnable again */
135 if (action == GUNYAH_RM_NOTIFICATION_VM_EXITED &&
136 le16_to_cpu(exit_payload->vmid) == vcpu->ghvm->vmid)
137 complete(&vcpu->ready);
138
139 return NOTIFY_OK;
140 }
141
142 static inline enum gunyah_vm_status
remap_vm_status(enum gunyah_rm_vm_status rm_status)143 remap_vm_status(enum gunyah_rm_vm_status rm_status)
144 {
145 switch (rm_status) {
146 case GUNYAH_RM_VM_STATUS_INIT_FAILED:
147 return GUNYAH_VM_STATUS_LOAD_FAILED;
148 case GUNYAH_RM_VM_STATUS_EXITED:
149 return GUNYAH_VM_STATUS_EXITED;
150 default:
151 return GUNYAH_VM_STATUS_CRASHED;
152 }
153 }
154
155 /**
156 * gunyah_vcpu_check_system() - Check whether VM as a whole is running
157 * @vcpu: Pointer to gunyah_vcpu
158 *
159 * Returns true if the VM is alive.
160 * Returns false if the vCPU is the VM is not alive (can only be that VM is shutting down).
161 */
gunyah_vcpu_check_system(struct gunyah_vcpu * vcpu)162 static bool gunyah_vcpu_check_system(struct gunyah_vcpu *vcpu)
163 __must_hold(&vcpu->run_lock)
164 {
165 bool ret = true;
166
167 down_read(&vcpu->ghvm->status_lock);
168 if (likely(vcpu->ghvm->vm_status == GUNYAH_RM_VM_STATUS_RUNNING))
169 goto out;
170
171 vcpu->vcpu_run->status.status = remap_vm_status(vcpu->ghvm->vm_status);
172 vcpu->vcpu_run->status.exit_info = vcpu->ghvm->exit_info;
173 vcpu->vcpu_run->exit_reason = GUNYAH_VCPU_EXIT_STATUS;
174 vcpu->state = GUNYAH_VCPU_RUN_STATE_SYSTEM_DOWN;
175 ret = false;
176 out:
177 up_read(&vcpu->ghvm->status_lock);
178 return ret;
179 }
180
181 /*
182 * We must have interrupts enabled when making the hypercall to switch to
183 * guest vcpu, else guest vcpu runs until end of hypervisor scheduling time
184 * slice and also increases interrupt latency. Native vtime accounting
185 * requires that interrupts are disabled, so we can't do accounting.
186 */
187 #if IS_ENABLED(CONFIG_TICK_CPU_ACCOUNTING)
gh_guest_accounting_enter(void)188 static inline void gh_guest_accounting_enter(void)
189 {
190 vtime_account_guest_enter();
191 }
192
gh_guest_accounting_exit(void)193 static inline void gh_guest_accounting_exit(void)
194 {
195 vtime_account_guest_exit();
196 }
197 #else /* !CONFIG_TICK_CPU_ACCOUNTING */
gh_guest_accounting_enter(void)198 static inline void gh_guest_accounting_enter(void) { }
gh_guest_accounting_exit(void)199 static inline void gh_guest_accounting_exit(void) { }
200 #endif /* CONFIG_TICK_CPU_ACCOUNTING */
201
202 /**
203 * gunyah_vcpu_run() - Request Gunyah to begin scheduling this vCPU.
204 * @vcpu: The client descriptor that was obtained via gunyah_vcpu_alloc()
205 */
gunyah_vcpu_run(struct gunyah_vcpu * vcpu)206 static int gunyah_vcpu_run(struct gunyah_vcpu *vcpu)
207 {
208 struct gunyah_hypercall_vcpu_run_resp vcpu_run_resp;
209 unsigned long resume_data[3] = { 0 };
210 enum gunyah_error gunyah_error;
211 int ret = 0;
212 u32 vcpu_id;
213
214 if (!vcpu->f)
215 return -ENODEV;
216
217 if (mutex_lock_interruptible(&vcpu->run_lock))
218 return -ERESTARTSYS;
219
220 if (!vcpu->rsc) {
221 ret = -ENODEV;
222 goto out;
223 }
224
225 vcpu_id = vcpu->ticket.label;
226 switch (vcpu->state) {
227 case GUNYAH_VCPU_RUN_STATE_UNKNOWN:
228 if (vcpu->ghvm->vm_status != GUNYAH_RM_VM_STATUS_RUNNING) {
229 /**
230 * Check if VM is up. If VM is starting, will block
231 * until VM is fully up since that thread does
232 * down_write.
233 */
234 if (!gunyah_vcpu_check_system(vcpu))
235 goto out;
236 }
237 vcpu->state = GUNYAH_VCPU_RUN_STATE_READY;
238 break;
239 case GUNYAH_VCPU_RUN_STATE_MMIO_READ:
240 case GUNYAH_VCPU_RUN_STATE_MMIO_WRITE:
241 ret = gunyah_handle_mmio_resume(vcpu, resume_data);
242 if (ret)
243 goto out;
244 vcpu->state = GUNYAH_VCPU_RUN_STATE_READY;
245 break;
246 case GUNYAH_VCPU_RUN_STATE_SYSTEM_DOWN:
247 goto out;
248 default:
249 break;
250 }
251
252 if (current->mm != vcpu->ghvm->mm_s) {
253 ret = -EPERM;
254 goto out;
255 }
256
257 while (!ret && !signal_pending(current)) {
258 if (vcpu->vcpu_run->immediate_exit) {
259 ret = -EINTR;
260 goto out;
261 }
262
263 trace_android_rvh_gh_before_vcpu_run(vcpu->ghvm->vmid, vcpu_id);
264 gh_guest_accounting_enter();
265 gunyah_error = gunyah_hypercall_vcpu_run(
266 vcpu->rsc->capid, resume_data, &vcpu_run_resp);
267 gh_guest_accounting_exit();
268 trace_android_rvh_gh_after_vcpu_run(vcpu->ghvm->vmid,
269 vcpu_id, gunyah_error,
270 (const struct gunyah_hypercall_vcpu_run_resp *)&vcpu_run_resp);
271
272 if (gunyah_error == GUNYAH_ERROR_OK) {
273 memset(resume_data, 0, sizeof(resume_data));
274 switch (vcpu_run_resp.state) {
275 case GUNYAH_VCPU_STATE_READY:
276 if (need_resched())
277 schedule();
278 break;
279 case GUNYAH_VCPU_STATE_POWERED_OFF:
280 fallthrough;
281 case GUNYAH_VCPU_STATE_SYSTEM_OFF:
282 /**
283 * vcpu might be off because the VM is shut down
284 * If so, it won't ever run again
285 */
286 if (!gunyah_vcpu_check_system(vcpu))
287 goto out;
288 /**
289 * Otherwise, another vcpu will turn it on (e.g.
290 * by PSCI) and hyp sends an interrupt to wake
291 * Linux up.
292 */
293 fallthrough;
294 case GUNYAH_VCPU_STATE_EXPECTS_WAKEUP:
295 ret = wait_for_completion_interruptible(
296 &vcpu->ready);
297 /**
298 * reinitialize completion before next
299 * hypercall. If we reinitialize after the
300 * hypercall, interrupt may have already come
301 * before re-initializing the completion and
302 * then end up waiting for event that already
303 * happened.
304 */
305 reinit_completion(&vcpu->ready);
306 /**
307 * Check VM status again. Completion
308 * might've come from VM exiting
309 */
310 if (!ret && !gunyah_vcpu_check_system(vcpu))
311 goto out;
312 break;
313 case GUNYAH_VCPU_STATE_BLOCKED:
314 schedule();
315 break;
316 case GUNYAH_VCPU_ADDRSPACE_VMMIO_READ:
317 case GUNYAH_VCPU_ADDRSPACE_VMMIO_WRITE:
318 if (!gunyah_handle_mmio(vcpu, resume_data,
319 &vcpu_run_resp))
320 goto out;
321 break;
322 case GUNYAH_VCPU_ADDRSPACE_PAGE_FAULT:
323 if (!gunyah_handle_page_fault(vcpu,
324 &vcpu_run_resp))
325 goto out;
326 break;
327 default:
328 pr_warn_ratelimited(
329 "Unknown vCPU state: %llx\n",
330 vcpu_run_resp.sized_state);
331 ret = -EINVAL;
332 goto out;
333 }
334 } else if (gunyah_error == GUNYAH_ERROR_RETRY) {
335 schedule();
336 } else {
337 ret = gunyah_error_remap(gunyah_error);
338 }
339 }
340
341 out:
342 mutex_unlock(&vcpu->run_lock);
343
344 if (signal_pending(current))
345 return -ERESTARTSYS;
346
347 return ret;
348 }
349
gunyah_vcpu_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)350 static long gunyah_vcpu_ioctl(struct file *filp, unsigned int cmd,
351 unsigned long arg)
352 {
353 struct gunyah_vcpu *vcpu = filp->private_data;
354 long ret = -ENOTTY;
355
356 switch (cmd) {
357 case GUNYAH_VCPU_RUN:
358 ret = gunyah_vcpu_run(vcpu);
359 break;
360 case GUNYAH_VCPU_MMAP_SIZE:
361 ret = PAGE_SIZE;
362 break;
363 default:
364 break;
365 }
366 return ret;
367 }
368
gunyah_vcpu_release(struct inode * inode,struct file * filp)369 static int gunyah_vcpu_release(struct inode *inode, struct file *filp)
370 {
371 struct gunyah_vcpu *vcpu = filp->private_data;
372
373 trace_android_rvh_gh_vcpu_release(vcpu->ghvm->vmid, vcpu);
374 gunyah_vm_put(vcpu->ghvm);
375 kref_put(&vcpu->kref, vcpu_release);
376 return 0;
377 }
378
gunyah_vcpu_fault(struct vm_fault * vmf)379 static vm_fault_t gunyah_vcpu_fault(struct vm_fault *vmf)
380 {
381 struct gunyah_vcpu *vcpu = vmf->vma->vm_file->private_data;
382 struct page *page;
383
384 if (vmf->pgoff)
385 return VM_FAULT_SIGBUS;
386
387 page = virt_to_page(vcpu->vcpu_run);
388 get_page(page);
389 vmf->page = page;
390 return 0;
391 }
392
393 static const struct vm_operations_struct gunyah_vcpu_ops = {
394 .fault = gunyah_vcpu_fault,
395 };
396
gunyah_vcpu_mmap(struct file * file,struct vm_area_struct * vma)397 static int gunyah_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
398 {
399 vma->vm_ops = &gunyah_vcpu_ops;
400 return 0;
401 }
402
403 static const struct file_operations gunyah_vcpu_fops = {
404 .owner = THIS_MODULE,
405 .unlocked_ioctl = gunyah_vcpu_ioctl,
406 .release = gunyah_vcpu_release,
407 .llseek = noop_llseek,
408 .mmap = gunyah_vcpu_mmap,
409 };
410
gunyah_vcpu_populate(struct gunyah_vm_resource_ticket * ticket,struct gunyah_resource * ghrsc)411 static bool gunyah_vcpu_populate(struct gunyah_vm_resource_ticket *ticket,
412 struct gunyah_resource *ghrsc)
413 {
414 struct gunyah_vcpu *vcpu =
415 container_of(ticket, struct gunyah_vcpu, ticket);
416 int ret;
417
418 mutex_lock(&vcpu->run_lock);
419 if (vcpu->rsc) {
420 pr_warn("vcpu%d already got a Gunyah resource. Check if multiple resources with same label were configured.\n",
421 vcpu->ticket.label);
422 ret = -EEXIST;
423 goto out;
424 }
425
426 vcpu->rsc = ghrsc;
427
428 ret = request_irq(vcpu->rsc->irq, gunyah_vcpu_irq_handler,
429 IRQF_TRIGGER_RISING, "gunyah_vcpu", vcpu);
430 if (ret) {
431 pr_warn("Failed to request vcpu irq %d: %d", vcpu->rsc->irq,
432 ret);
433 goto out;
434 }
435
436 enable_irq_wake(vcpu->rsc->irq);
437
438 out:
439 mutex_unlock(&vcpu->run_lock);
440 return !ret;
441 }
442
gunyah_vcpu_unpopulate(struct gunyah_vm_resource_ticket * ticket,struct gunyah_resource * ghrsc)443 static void gunyah_vcpu_unpopulate(struct gunyah_vm_resource_ticket *ticket,
444 struct gunyah_resource *ghrsc)
445 {
446 struct gunyah_vcpu *vcpu =
447 container_of(ticket, struct gunyah_vcpu, ticket);
448
449 vcpu->vcpu_run->immediate_exit = true;
450 complete_all(&vcpu->ready);
451 mutex_lock(&vcpu->run_lock);
452 free_irq(vcpu->rsc->irq, vcpu);
453 vcpu->rsc = NULL;
454 mutex_unlock(&vcpu->run_lock);
455 }
456
gunyah_vcpu_bind(struct gunyah_vm_function_instance * f)457 static long gunyah_vcpu_bind(struct gunyah_vm_function_instance *f)
458 {
459 struct gunyah_fn_vcpu_arg *arg = f->argp;
460 struct gunyah_vcpu *vcpu;
461 char name[MAX_VCPU_NAME];
462 struct file *file;
463 struct page *page;
464 int fd;
465 long r;
466
467 if (f->arg_size != sizeof(*arg))
468 return -EINVAL;
469
470 vcpu = kzalloc(sizeof(*vcpu), GFP_KERNEL);
471 if (!vcpu)
472 return -ENOMEM;
473
474 vcpu->f = f;
475 f->data = vcpu;
476 mutex_init(&vcpu->run_lock);
477 kref_init(&vcpu->kref);
478 init_completion(&vcpu->ready);
479
480 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
481 if (!page) {
482 r = -ENOMEM;
483 goto err_destroy_vcpu;
484 }
485 vcpu->vcpu_run = page_address(page);
486
487 vcpu->ticket.resource_type = GUNYAH_RESOURCE_TYPE_VCPU;
488 vcpu->ticket.label = arg->id;
489 vcpu->ticket.owner = THIS_MODULE;
490 vcpu->ticket.populate = gunyah_vcpu_populate;
491 vcpu->ticket.unpopulate = gunyah_vcpu_unpopulate;
492
493 r = gunyah_vm_add_resource_ticket(f->ghvm, &vcpu->ticket);
494 if (r)
495 goto err_destroy_page;
496
497 if (!gunyah_vm_get(f->ghvm)) {
498 r = -ENODEV;
499 goto err_remove_resource_ticket;
500 }
501 vcpu->ghvm = f->ghvm;
502
503 vcpu->nb.notifier_call = gunyah_vcpu_rm_notification;
504 /**
505 * Ensure we run after the vm_mgr handles the notification and does
506 * any necessary state changes.
507 */
508 vcpu->nb.priority = -1;
509 r = gunyah_rm_notifier_register(f->rm, &vcpu->nb);
510 if (r)
511 goto err_put_gunyah_vm;
512
513 kref_get(&vcpu->kref);
514
515 fd = get_unused_fd_flags(O_CLOEXEC);
516 if (fd < 0) {
517 r = fd;
518 goto err_notifier;
519 }
520
521 snprintf(name, sizeof(name), "gh-vcpu:%u", vcpu->ticket.label);
522 file = anon_inode_getfile(name, &gunyah_vcpu_fops, vcpu, O_RDWR);
523 if (IS_ERR(file)) {
524 r = PTR_ERR(file);
525 goto err_put_fd;
526 }
527
528 fd_install(fd, file);
529
530 return fd;
531 err_put_fd:
532 put_unused_fd(fd);
533 err_notifier:
534 gunyah_rm_notifier_unregister(f->rm, &vcpu->nb);
535 err_put_gunyah_vm:
536 gunyah_vm_put(vcpu->ghvm);
537 err_remove_resource_ticket:
538 gunyah_vm_remove_resource_ticket(f->ghvm, &vcpu->ticket);
539 err_destroy_page:
540 free_page((unsigned long)vcpu->vcpu_run);
541 err_destroy_vcpu:
542 kfree(vcpu);
543 return r;
544 }
545
gunyah_vcpu_unbind(struct gunyah_vm_function_instance * f)546 static void gunyah_vcpu_unbind(struct gunyah_vm_function_instance *f)
547 {
548 struct gunyah_vcpu *vcpu = f->data;
549
550 gunyah_rm_notifier_unregister(f->rm, &vcpu->nb);
551 gunyah_vm_remove_resource_ticket(vcpu->ghvm, &vcpu->ticket);
552 vcpu->f = NULL;
553
554 kref_put(&vcpu->kref, vcpu_release);
555 }
556
gunyah_vcpu_compare(const struct gunyah_vm_function_instance * f,const void * arg,size_t size)557 static bool gunyah_vcpu_compare(const struct gunyah_vm_function_instance *f,
558 const void *arg, size_t size)
559 {
560 const struct gunyah_fn_vcpu_arg *instance = f->argp, *other = arg;
561
562 if (sizeof(*other) != size)
563 return false;
564
565 return instance->id == other->id;
566 }
567
568 DECLARE_GUNYAH_VM_FUNCTION_INIT(vcpu, GUNYAH_FN_VCPU, 1, gunyah_vcpu_bind,
569 gunyah_vcpu_unbind, gunyah_vcpu_compare);
570 MODULE_DESCRIPTION("Gunyah vCPU Function");
571 MODULE_LICENSE("GPL");
572