1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2023 Google LLC
4 * Author: Mostafa Saleh <smostafa@google.com>
5 */
6
7 #include <nvhe/iommu.h>
8 #include <nvhe/mem_protect.h>
9 #include <nvhe/mm.h>
10 #include <nvhe/pkvm.h>
11 #include <nvhe/pviommu-host.h>
12
13 #include <kvm/arm_hypercalls.h>
14 #include <kvm/device.h>
15
16 struct pkvm_device *registered_devices;
17 unsigned long registered_devices_nr;
18
19 /*
20 * This lock protects all devices in registered_devices when ctxt changes,
21 * this is overlocking and can be improved. However, the device context
22 * only changes at boot time and at teardown and in theory there shouldn't
23 * be congestion on that path.
24 * All changes/checks to MMIO state or IOMMU must be atomic with the ctxt
25 * of the device.
26 */
27 static DEFINE_HYP_SPINLOCK(device_spinlock);
28
pkvm_init_devices(void)29 int pkvm_init_devices(void)
30 {
31 size_t dev_sz;
32 int ret;
33
34 if (!registered_devices_nr)
35 return 0;
36
37 registered_devices = kern_hyp_va(registered_devices);
38 dev_sz = PAGE_ALIGN(size_mul(sizeof(struct pkvm_device),
39 registered_devices_nr));
40
41 ret = __pkvm_host_donate_hyp(hyp_virt_to_phys(registered_devices) >> PAGE_SHIFT,
42 dev_sz >> PAGE_SHIFT);
43 if (ret)
44 registered_devices_nr = 0;
45 return ret;
46 }
47
48 /* return device from a resource, addr and size must match. */
pkvm_get_device(u64 addr,size_t size)49 static struct pkvm_device *pkvm_get_device(u64 addr, size_t size)
50 {
51 struct pkvm_device *dev;
52 struct pkvm_dev_resource *res;
53 int i, j;
54
55 for (i = 0 ; i < registered_devices_nr ; ++i) {
56 dev = ®istered_devices[i];
57 for (j = 0 ; j < dev->nr_resources; ++j) {
58 res = &dev->resources[j];
59 if ((addr == res->base) && (size == res->size))
60 return dev;
61 }
62 }
63
64 return NULL;
65 }
66
pkvm_get_device_by_addr(u64 addr)67 static struct pkvm_device *pkvm_get_device_by_addr(u64 addr)
68 {
69 struct pkvm_device *dev;
70 struct pkvm_dev_resource *res;
71 int i, j;
72
73 for (i = 0 ; i < registered_devices_nr ; ++i) {
74 dev = ®istered_devices[i];
75 for (j = 0 ; j < dev->nr_resources; ++j) {
76 res = &dev->resources[j];
77 if ((addr >= res->base) && (addr < res->base + res->size))
78 return dev;
79 }
80 }
81
82 return NULL;
83 }
84
85 /*
86 * Devices assigned to guest has to transition first to hypervisor,
87 * this guarantees that there is a point of time that the device is
88 * neither accessible from the host or the guest, so the hypervisor
89 * can reset it and block it's IOOMU.
90 * The host will donate the whole device first to the hypervisor
91 * before the guest touches or requests any part of the device
92 * and upon the first request or access the hypervisor will ensure
93 * that the device is fully donated first.
94 */
pkvm_device_hyp_assign_mmio(u64 pfn,u64 nr_pages)95 int pkvm_device_hyp_assign_mmio(u64 pfn, u64 nr_pages)
96 {
97 struct pkvm_device *dev;
98 int ret;
99 size_t size = nr_pages << PAGE_SHIFT;
100 u64 phys = pfn << PAGE_SHIFT;
101
102 dev = pkvm_get_device(phys, size);
103 if (!dev)
104 return -ENODEV;
105
106 hyp_spin_lock(&device_spinlock);
107 /* A VM already have this device, no take backs. */
108 if (dev->ctxt || dev->refcount) {
109 ret = -EBUSY;
110 goto out_unlock;
111 }
112
113 ret = ___pkvm_host_donate_hyp_prot(pfn, nr_pages, true, PAGE_HYP_DEVICE);
114 /* Hyp have device mapping, while host may have issue cacheable writes.*/
115 if (!ret)
116 kvm_flush_dcache_to_poc(__hyp_va(phys), PAGE_SIZE);
117
118 out_unlock:
119 hyp_spin_unlock(&device_spinlock);
120 return ret;
121 }
122
123 /*
124 * Reclaim of MMIO can happen in two cases:
125 * - VM is dying, in that case MMIO would be eagerly reclaimed to the host
126 * from VM teardown context without host intervention.
127 * - The VM was not launched or died before claiming the device, and it's is
128 * still considered as host device, but the MMIO was already donated to
129 * the hypervisor preparing for the VM to access it, in that case the host
130 * will use this function from an HVC to reclaim the MMIO from KVM/VFIO
131 * file release context or incase of failure at initialization.
132 */
pkvm_device_reclaim_mmio(u64 pfn,u64 nr_pages)133 int pkvm_device_reclaim_mmio(u64 pfn, u64 nr_pages)
134 {
135 struct pkvm_device *dev;
136 int ret;
137 size_t size = nr_pages << PAGE_SHIFT;
138 u64 phys = pfn << PAGE_SHIFT;
139
140 dev = pkvm_get_device(phys, size);
141 if (!dev)
142 return -ENODEV;
143
144 hyp_spin_lock(&device_spinlock);
145 if (dev->ctxt) {
146 ret = -EBUSY;
147 goto out_unlock;
148 }
149
150 ret = __pkvm_hyp_donate_host(pfn, nr_pages);
151
152 out_unlock:
153 hyp_spin_unlock(&device_spinlock);
154 return ret;
155 }
156
pkvm_device_reset(struct pkvm_device * dev,bool host_to_guest)157 static int pkvm_device_reset(struct pkvm_device *dev, bool host_to_guest)
158 {
159 struct pkvm_dev_iommu *iommu;
160 int ret;
161 int i;
162
163 hyp_assert_lock_held(&device_spinlock);
164
165 /* Reset is mandatory. */
166 if (!dev->reset_handler)
167 return -ENODEV;
168
169 ret = dev->reset_handler(dev->cookie, host_to_guest);
170 if (ret)
171 return ret;
172
173 for (i = 0 ; i < dev->nr_iommus ; ++i) {
174 iommu = &dev->iommus[i];
175 ret = kvm_iommu_dev_block_dma(iommu->id, iommu->endpoint, host_to_guest);
176 if (WARN_ON(ret))
177 return ret;
178 }
179 return 0;
180 }
181
__pkvm_device_assign(struct pkvm_device * dev,struct pkvm_hyp_vm * vm)182 static int __pkvm_device_assign(struct pkvm_device *dev, struct pkvm_hyp_vm *vm)
183 {
184 int i;
185 struct pkvm_dev_resource *res;
186 int ret;
187
188 hyp_assert_lock_held(&device_spinlock);
189
190 for (i = 0 ; i < dev->nr_resources; ++i) {
191 res = &dev->resources[i];
192 ret = hyp_check_range_owned(res->base, res->size);
193 if (ret)
194 return ret;
195 }
196
197 ret = pkvm_device_reset(dev, true);
198 if (ret)
199 return ret;
200
201 dev->ctxt = vm;
202 return 0;
203 }
204
205 /*
206 * Atomically check that all the group is assigned to the hypervisor
207 * and tag the devices in the group as owned by the VM.
208 * This can't race with reclaim as it's protected by device_spinlock
209 */
__pkvm_group_assign(u32 group_id,struct pkvm_hyp_vm * vm)210 static int __pkvm_group_assign(u32 group_id, struct pkvm_hyp_vm *vm)
211 {
212 int i;
213 int ret = 0;
214
215 hyp_assert_lock_held(&device_spinlock);
216
217 for (i = 0 ; i < registered_devices_nr ; ++i) {
218 struct pkvm_device *dev = ®istered_devices[i];
219
220 if (dev->group_id != group_id)
221 continue;
222 if (dev->ctxt || dev->refcount) {
223 ret = -EPERM;
224 break;
225 }
226 ret = __pkvm_device_assign(dev, vm);
227 if (ret)
228 break;
229 }
230
231 if (ret) {
232 while (i--) {
233 struct pkvm_device *dev = ®istered_devices[i];
234
235 if (dev->group_id == group_id)
236 dev->ctxt = NULL;
237 }
238 }
239 return ret;
240 }
241
242
pkvm_host_map_guest_mmio(struct pkvm_hyp_vcpu * hyp_vcpu,u64 pfn,u64 gfn)243 int pkvm_host_map_guest_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 pfn, u64 gfn)
244 {
245 int ret = 0;
246 struct pkvm_device *dev = pkvm_get_device_by_addr(hyp_pfn_to_phys(pfn));
247 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
248
249 if (!dev)
250 return -ENODEV;
251
252 hyp_spin_lock(&device_spinlock);
253
254 if (dev->ctxt == NULL) {
255 /*
256 * First time the device is assigned to a guest, make sure the whole
257 * group is assigned to the hypervisor.
258 */
259 ret = __pkvm_group_assign(dev->group_id, vm);
260 } else if (dev->ctxt != vm) {
261 ret = -EBUSY;
262 }
263
264 if (ret)
265 goto out_ret;
266
267 ret = __pkvm_install_guest_mmio(hyp_vcpu, pfn, gfn);
268
269 out_ret:
270 hyp_spin_unlock(&device_spinlock);
271 return ret;
272 }
273
pkvm_device_request_mmio(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)274 bool pkvm_device_request_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
275 {
276 int i, j, ret;
277 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
278 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
279 u64 ipa = smccc_get_arg1(vcpu);
280 u64 token;
281 s8 level;
282
283 /* args 2..6 reserved for future use. */
284 if (smccc_get_arg2(vcpu) || smccc_get_arg3(vcpu) || smccc_get_arg4(vcpu) ||
285 smccc_get_arg5(vcpu) || smccc_get_arg6(vcpu) || !PAGE_ALIGNED(ipa))
286 goto out_inval;
287
288 ret = pkvm_get_guest_pa_request(hyp_vcpu, ipa, PAGE_SIZE,
289 &token, &level);
290 if (ret == -ENOENT) {
291 /* Repeat next time. */
292 write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
293 *exit_code = ARM_EXCEPTION_HYP_REQ;
294 return false;
295 }
296 else if (ret) {
297 goto out_inval;
298 }
299
300 /* It's expected the address is mapped as page for MMIO */
301 WARN_ON(level != KVM_PGTABLE_LAST_LEVEL);
302
303 hyp_spin_lock(&device_spinlock);
304 for (i = 0 ; i < registered_devices_nr ; ++i) {
305 struct pkvm_device *dev = ®istered_devices[i];
306
307 if (dev->ctxt != vm)
308 continue;
309
310 for (j = 0 ; j < dev->nr_resources; ++j) {
311 struct pkvm_dev_resource *res = &dev->resources[j];
312
313 if ((token >= res->base) && (token + PAGE_SIZE <= res->base + res->size)) {
314 smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, token, 0, 0);
315 goto out_ret;
316 }
317 }
318 }
319
320 smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
321 out_ret:
322 hyp_spin_unlock(&device_spinlock);
323 return true;
324 out_inval:
325 smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
326 return true;
327 }
328
pkvm_devices_reclaim_device(struct pkvm_device * dev)329 static void pkvm_devices_reclaim_device(struct pkvm_device *dev)
330 {
331 int i;
332
333 for (i = 0 ; i < dev->nr_resources ; ++i) {
334 struct pkvm_dev_resource *res = &dev->resources[i];
335
336 hyp_spin_lock(&host_mmu.lock);
337 WARN_ON(host_stage2_set_owner_locked(res->base, res->size, PKVM_ID_HOST));
338 hyp_spin_unlock(&host_mmu.lock);
339 }
340 }
341
pkvm_devices_teardown(struct pkvm_hyp_vm * vm)342 void pkvm_devices_teardown(struct pkvm_hyp_vm *vm)
343 {
344 int i;
345
346 hyp_spin_lock(&device_spinlock);
347 for (i = 0 ; i < registered_devices_nr ; ++i) {
348 struct pkvm_device *dev = ®istered_devices[i];
349
350 if (dev->ctxt != vm)
351 continue;
352 WARN_ON(pkvm_device_reset(dev, false));
353 dev->ctxt = NULL;
354 pkvm_devices_reclaim_device(dev);
355 }
356 hyp_spin_unlock(&device_spinlock);
357 }
358
pkvm_get_device_by_iommu(u64 id,u32 endpoint_id)359 static struct pkvm_device *pkvm_get_device_by_iommu(u64 id, u32 endpoint_id)
360 {
361 struct pkvm_device *dev = NULL;
362 struct pkvm_dev_iommu *iommu;
363 int i, j;
364
365 for (i = 0 ; i < registered_devices_nr ; ++i) {
366 dev = ®istered_devices[i];
367 for (j = 0 ; j < dev->nr_iommus; ++j) {
368 iommu = &dev->iommus[j];
369 if ((id == iommu->id) && (endpoint_id == iommu->endpoint))
370 return dev;
371 }
372 }
373
374 return NULL;
375 }
376
pkvm_devices_get_context(u64 iommu_id,u32 endpoint_id,struct pkvm_hyp_vm * vm)377 int pkvm_devices_get_context(u64 iommu_id, u32 endpoint_id, struct pkvm_hyp_vm *vm)
378 {
379 struct pkvm_device *dev = pkvm_get_device_by_iommu(iommu_id, endpoint_id);
380 int ret = 0;
381
382 if (!dev)
383 return 0;
384
385 hyp_spin_lock(&device_spinlock);
386 if (dev->ctxt != vm)
387 ret = -EPERM;
388 else
389 hyp_refcount_inc(dev->refcount);
390 hyp_spin_unlock(&device_spinlock);
391 return ret;
392 }
393
pkvm_devices_put_context(u64 iommu_id,u32 endpoint_id)394 void pkvm_devices_put_context(u64 iommu_id, u32 endpoint_id)
395 {
396 struct pkvm_device *dev = pkvm_get_device_by_iommu(iommu_id, endpoint_id);
397
398 if (!dev)
399 return;
400
401 hyp_spin_lock(&device_spinlock);
402 hyp_refcount_dec(dev->refcount);
403 hyp_spin_unlock(&device_spinlock);
404 }
405
pkvm_device_register_reset(u64 phys,void * cookie,int (* cb)(void * cookie,bool host_to_guest))406 int pkvm_device_register_reset(u64 phys, void *cookie,
407 int (*cb)(void *cookie, bool host_to_guest))
408 {
409 struct pkvm_device *dev;
410 int ret = 0;
411
412 dev = pkvm_get_device_by_addr(phys);
413 if (!dev)
414 return -ENODEV;
415
416 hyp_spin_lock(&device_spinlock);
417 if (!dev->reset_handler) {
418 dev->reset_handler = cb;
419 dev->cookie = cookie;
420 } else {
421 ret = -EBUSY;
422 }
423 hyp_spin_unlock(&device_spinlock);
424
425 return ret;
426 }
427
pkvm_device_request_dma(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)428 bool pkvm_device_request_dma(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
429 {
430 int ret;
431 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
432 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
433 u64 pviommu = smccc_get_arg1(vcpu);
434 u64 vsid = smccc_get_arg2(vcpu);
435 u64 token1, token2;
436 struct pviommu_route route;
437 struct pkvm_device *dev;
438
439 if (smccc_get_arg3(vcpu) || smccc_get_arg4(vcpu) || smccc_get_arg5(vcpu) ||
440 smccc_get_arg6(vcpu))
441 goto out_ret;
442
443 ret = pkvm_pviommu_get_route(vm, pviommu, vsid, &route);
444 if (ret)
445 goto out_ret;
446 token2 = route.sid;
447 /*
448 * route.iommu is the host-hyp iommu ID that has no meaning for guest.
449 * It needs to be converted to IOMMU token as in the firmware(usually
450 * base MMIO address).
451 */
452 ret = kvm_iommu_id_to_token(route.iommu, &token1);
453 if (ret)
454 goto out_ret;
455
456 dev = pkvm_get_device_by_iommu(route.iommu, route.sid);
457 if (!dev)
458 goto out_ret;
459
460 hyp_spin_lock(&device_spinlock);
461 if (dev->ctxt == NULL) {
462 /*
463 * First time device is assigned to guest, make sure it's resources
464 * have been donated.
465 */
466 ret = __pkvm_group_assign(dev->group_id, vm);
467 } else if (dev->ctxt != vm) {
468 ret = -EPERM;
469 }
470 hyp_spin_unlock(&device_spinlock);
471 if (ret)
472 goto out_ret;
473
474 smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, token1, token2, 0);
475 return true;
476 out_ret:
477 smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
478 return true;
479 }
480