• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2023 Google LLC
4  * Author: Mostafa Saleh <smostafa@google.com>
5  */
6 
7 #include <nvhe/iommu.h>
8 #include <nvhe/mem_protect.h>
9 #include <nvhe/mm.h>
10 #include <nvhe/pkvm.h>
11 #include <nvhe/pviommu-host.h>
12 
13 #include <kvm/arm_hypercalls.h>
14 #include <kvm/device.h>
15 
16 struct pkvm_device *registered_devices;
17 unsigned long registered_devices_nr;
18 
19 /*
20  * This lock protects all devices in registered_devices when ctxt changes,
21  * this is overlocking and can be improved. However, the device context
22  * only changes at boot time and at teardown and in theory there shouldn't
23  * be congestion on that path.
24  * All changes/checks to MMIO state or IOMMU must be atomic with the ctxt
25  * of the device.
26  */
27 static DEFINE_HYP_SPINLOCK(device_spinlock);
28 
pkvm_init_devices(void)29 int pkvm_init_devices(void)
30 {
31 	size_t dev_sz;
32 	int ret;
33 
34 	if (!registered_devices_nr)
35 		return 0;
36 
37 	registered_devices = kern_hyp_va(registered_devices);
38 	dev_sz = PAGE_ALIGN(size_mul(sizeof(struct pkvm_device),
39 				     registered_devices_nr));
40 
41 	ret = __pkvm_host_donate_hyp(hyp_virt_to_phys(registered_devices) >> PAGE_SHIFT,
42 				     dev_sz >> PAGE_SHIFT);
43 	if (ret)
44 		registered_devices_nr = 0;
45 	return ret;
46 }
47 
48 /* return device from a resource, addr and size must match. */
pkvm_get_device(u64 addr,size_t size)49 static struct pkvm_device *pkvm_get_device(u64 addr, size_t size)
50 {
51 	struct pkvm_device *dev;
52 	struct pkvm_dev_resource *res;
53 	int i, j;
54 
55 	for (i = 0 ; i < registered_devices_nr ; ++i) {
56 		dev = &registered_devices[i];
57 		for (j = 0 ; j < dev->nr_resources; ++j) {
58 			res = &dev->resources[j];
59 			if ((addr == res->base) && (size == res->size))
60 				return dev;
61 		}
62 	}
63 
64 	return NULL;
65 }
66 
pkvm_get_device_by_addr(u64 addr)67 static struct pkvm_device *pkvm_get_device_by_addr(u64 addr)
68 {
69 	struct pkvm_device *dev;
70 	struct pkvm_dev_resource *res;
71 	int i, j;
72 
73 	for (i = 0 ; i < registered_devices_nr ; ++i) {
74 		dev = &registered_devices[i];
75 		for (j = 0 ; j < dev->nr_resources; ++j) {
76 			res = &dev->resources[j];
77 			if ((addr >= res->base) && (addr < res->base + res->size))
78 				return dev;
79 		}
80 	}
81 
82 	return NULL;
83 }
84 
85 /*
86  * Devices assigned to guest has to transition first to hypervisor,
87  * this guarantees that there is a point of time that the device is
88  * neither accessible from the host or the guest, so the hypervisor
89  * can reset it and block it's IOOMU.
90  * The host will donate the whole device first to the hypervisor
91  * before the guest touches or requests any part of the device
92  * and upon the first request or access the hypervisor will ensure
93  * that the device is fully donated first.
94  */
pkvm_device_hyp_assign_mmio(u64 pfn,u64 nr_pages)95 int pkvm_device_hyp_assign_mmio(u64 pfn, u64 nr_pages)
96 {
97 	struct pkvm_device *dev;
98 	int ret;
99 	size_t size = nr_pages << PAGE_SHIFT;
100 	u64 phys = pfn << PAGE_SHIFT;
101 
102 	dev = pkvm_get_device(phys, size);
103 	if (!dev)
104 		return -ENODEV;
105 
106 	hyp_spin_lock(&device_spinlock);
107 	/* A VM already have this device, no take backs. */
108 	if (dev->ctxt || dev->refcount) {
109 		ret = -EBUSY;
110 		goto out_unlock;
111 	}
112 
113 	ret = ___pkvm_host_donate_hyp_prot(pfn, nr_pages, true, PAGE_HYP_DEVICE);
114 	/* Hyp have device mapping, while host may have issue cacheable writes.*/
115 	if (!ret)
116 		kvm_flush_dcache_to_poc(__hyp_va(phys), PAGE_SIZE);
117 
118 out_unlock:
119 	hyp_spin_unlock(&device_spinlock);
120 	return ret;
121 }
122 
123 /*
124  * Reclaim of MMIO can happen in two cases:
125  * - VM is dying, in that case MMIO would be eagerly reclaimed to the host
126  *   from VM teardown context without host intervention.
127  * - The VM was not launched or died before claiming the device, and it's is
128  *   still considered as host device, but the MMIO was already donated to
129  *   the hypervisor preparing for the VM to access it, in that case the host
130  *   will use this function from an HVC to reclaim the MMIO from KVM/VFIO
131  *   file release context or incase of failure at initialization.
132  */
pkvm_device_reclaim_mmio(u64 pfn,u64 nr_pages)133 int pkvm_device_reclaim_mmio(u64 pfn, u64 nr_pages)
134 {
135 	struct pkvm_device *dev;
136 	int ret;
137 	size_t size = nr_pages << PAGE_SHIFT;
138 	u64 phys = pfn << PAGE_SHIFT;
139 
140 	dev = pkvm_get_device(phys, size);
141 	if (!dev)
142 		return -ENODEV;
143 
144 	hyp_spin_lock(&device_spinlock);
145 	if (dev->ctxt) {
146 		ret = -EBUSY;
147 		goto out_unlock;
148 	}
149 
150 	ret = __pkvm_hyp_donate_host(pfn, nr_pages);
151 
152 out_unlock:
153 	hyp_spin_unlock(&device_spinlock);
154 	return ret;
155 }
156 
pkvm_device_reset(struct pkvm_device * dev,bool host_to_guest)157 static int pkvm_device_reset(struct pkvm_device *dev, bool host_to_guest)
158 {
159 	struct pkvm_dev_iommu *iommu;
160 	int ret;
161 	int i;
162 
163 	hyp_assert_lock_held(&device_spinlock);
164 
165 	/* Reset is mandatory. */
166 	if (!dev->reset_handler)
167 		return -ENODEV;
168 
169 	ret = dev->reset_handler(dev->cookie, host_to_guest);
170 	if (ret)
171 		return ret;
172 
173 	for (i = 0 ; i < dev->nr_iommus ; ++i) {
174 		iommu = &dev->iommus[i];
175 		ret = kvm_iommu_dev_block_dma(iommu->id, iommu->endpoint, host_to_guest);
176 		if (WARN_ON(ret))
177 			return ret;
178 	}
179 	return 0;
180 }
181 
__pkvm_device_assign(struct pkvm_device * dev,struct pkvm_hyp_vm * vm)182 static int __pkvm_device_assign(struct pkvm_device *dev, struct pkvm_hyp_vm *vm)
183 {
184 	int i;
185 	struct pkvm_dev_resource *res;
186 	int ret;
187 
188 	hyp_assert_lock_held(&device_spinlock);
189 
190 	for (i = 0 ; i < dev->nr_resources; ++i) {
191 		res = &dev->resources[i];
192 		ret = hyp_check_range_owned(res->base, res->size);
193 		if (ret)
194 			return ret;
195 	}
196 
197 	ret = pkvm_device_reset(dev, true);
198 	if (ret)
199 		return ret;
200 
201 	dev->ctxt = vm;
202 	return 0;
203 }
204 
205 /*
206  * Atomically check that all the group is assigned to the hypervisor
207  * and tag the devices in the group as owned by the VM.
208  * This can't race with reclaim as it's protected by device_spinlock
209  */
__pkvm_group_assign(u32 group_id,struct pkvm_hyp_vm * vm)210 static int __pkvm_group_assign(u32 group_id, struct pkvm_hyp_vm *vm)
211 {
212 	int i;
213 	int ret = 0;
214 
215 	hyp_assert_lock_held(&device_spinlock);
216 
217 	for (i = 0 ; i < registered_devices_nr ; ++i) {
218 		struct pkvm_device *dev = &registered_devices[i];
219 
220 		if (dev->group_id != group_id)
221 			continue;
222 		if (dev->ctxt || dev->refcount) {
223 			ret = -EPERM;
224 			break;
225 		}
226 		ret = __pkvm_device_assign(dev, vm);
227 		if (ret)
228 			break;
229 	}
230 
231 	if (ret) {
232 		while (i--) {
233 			struct pkvm_device *dev = &registered_devices[i];
234 
235 			if (dev->group_id == group_id)
236 				dev->ctxt = NULL;
237 		}
238 	}
239 	return ret;
240 }
241 
242 
pkvm_host_map_guest_mmio(struct pkvm_hyp_vcpu * hyp_vcpu,u64 pfn,u64 gfn)243 int pkvm_host_map_guest_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 pfn, u64 gfn)
244 {
245 	int ret = 0;
246 	struct pkvm_device *dev = pkvm_get_device_by_addr(hyp_pfn_to_phys(pfn));
247 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
248 
249 	if (!dev)
250 		return -ENODEV;
251 
252 	hyp_spin_lock(&device_spinlock);
253 
254 	if (dev->ctxt == NULL) {
255 		/*
256 		 * First time the device is assigned to a guest, make sure the whole
257 		 * group is assigned to the hypervisor.
258 		 */
259 		ret = __pkvm_group_assign(dev->group_id, vm);
260 	} else if (dev->ctxt != vm) {
261 		ret = -EBUSY;
262 	}
263 
264 	if (ret)
265 		goto out_ret;
266 
267 	ret = __pkvm_install_guest_mmio(hyp_vcpu, pfn, gfn);
268 
269 out_ret:
270 	hyp_spin_unlock(&device_spinlock);
271 	return ret;
272 }
273 
pkvm_device_request_mmio(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)274 bool pkvm_device_request_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
275 {
276 	int i, j, ret;
277 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
278 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
279 	u64 ipa = smccc_get_arg1(vcpu);
280 	u64 token;
281 	s8 level;
282 
283 	/* args 2..6 reserved for future use. */
284 	if (smccc_get_arg2(vcpu) || smccc_get_arg3(vcpu) || smccc_get_arg4(vcpu) ||
285 	    smccc_get_arg5(vcpu) || smccc_get_arg6(vcpu) || !PAGE_ALIGNED(ipa))
286 		goto out_inval;
287 
288 	ret = pkvm_get_guest_pa_request(hyp_vcpu, ipa, PAGE_SIZE,
289 					&token, &level);
290 	if (ret == -ENOENT) {
291 		/* Repeat next time. */
292 		write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
293 		*exit_code = ARM_EXCEPTION_HYP_REQ;
294 		return false;
295 	}
296 	else if (ret) {
297 		goto out_inval;
298 	}
299 
300 	/* It's expected the address is mapped as page for MMIO */
301 	WARN_ON(level != KVM_PGTABLE_LAST_LEVEL);
302 
303 	hyp_spin_lock(&device_spinlock);
304 	for (i = 0 ; i < registered_devices_nr ; ++i) {
305 		struct pkvm_device *dev = &registered_devices[i];
306 
307 		if (dev->ctxt != vm)
308 			continue;
309 
310 		for (j = 0 ; j < dev->nr_resources; ++j) {
311 			struct pkvm_dev_resource *res = &dev->resources[j];
312 
313 			if ((token >= res->base) && (token + PAGE_SIZE <= res->base + res->size)) {
314 				smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, token, 0, 0);
315 				goto out_ret;
316 			}
317 		}
318 	}
319 
320 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
321 out_ret:
322 	hyp_spin_unlock(&device_spinlock);
323 	return true;
324 out_inval:
325 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
326 	return true;
327 }
328 
pkvm_devices_reclaim_device(struct pkvm_device * dev)329 static void pkvm_devices_reclaim_device(struct pkvm_device *dev)
330 {
331 	int i;
332 
333 	for (i = 0 ; i < dev->nr_resources ; ++i) {
334 		struct pkvm_dev_resource *res = &dev->resources[i];
335 
336 		hyp_spin_lock(&host_mmu.lock);
337 		WARN_ON(host_stage2_set_owner_locked(res->base, res->size, PKVM_ID_HOST));
338 		hyp_spin_unlock(&host_mmu.lock);
339 	}
340 }
341 
pkvm_devices_teardown(struct pkvm_hyp_vm * vm)342 void pkvm_devices_teardown(struct pkvm_hyp_vm *vm)
343 {
344 	int i;
345 
346 	hyp_spin_lock(&device_spinlock);
347 	for (i = 0 ; i < registered_devices_nr ; ++i) {
348 		struct pkvm_device *dev = &registered_devices[i];
349 
350 		if (dev->ctxt != vm)
351 			continue;
352 		WARN_ON(pkvm_device_reset(dev, false));
353 		dev->ctxt = NULL;
354 		pkvm_devices_reclaim_device(dev);
355 	}
356 	hyp_spin_unlock(&device_spinlock);
357 }
358 
pkvm_get_device_by_iommu(u64 id,u32 endpoint_id)359 static struct pkvm_device *pkvm_get_device_by_iommu(u64 id, u32 endpoint_id)
360 {
361 	struct pkvm_device *dev = NULL;
362 	struct pkvm_dev_iommu *iommu;
363 	int i, j;
364 
365 	for (i = 0 ; i < registered_devices_nr ; ++i) {
366 		dev = &registered_devices[i];
367 		for (j = 0 ; j < dev->nr_iommus; ++j) {
368 			iommu = &dev->iommus[j];
369 			if ((id == iommu->id) && (endpoint_id == iommu->endpoint))
370 				return dev;
371 		}
372 	}
373 
374 	return NULL;
375 }
376 
pkvm_devices_get_context(u64 iommu_id,u32 endpoint_id,struct pkvm_hyp_vm * vm)377 int pkvm_devices_get_context(u64 iommu_id, u32 endpoint_id, struct pkvm_hyp_vm *vm)
378 {
379 	struct pkvm_device *dev = pkvm_get_device_by_iommu(iommu_id, endpoint_id);
380 	int ret = 0;
381 
382 	if (!dev)
383 		return 0;
384 
385 	hyp_spin_lock(&device_spinlock);
386 	if (dev->ctxt != vm)
387 		ret = -EPERM;
388 	else
389 		hyp_refcount_inc(dev->refcount);
390 	hyp_spin_unlock(&device_spinlock);
391 	return ret;
392 }
393 
pkvm_devices_put_context(u64 iommu_id,u32 endpoint_id)394 void pkvm_devices_put_context(u64 iommu_id, u32 endpoint_id)
395 {
396 	struct pkvm_device *dev = pkvm_get_device_by_iommu(iommu_id, endpoint_id);
397 
398 	if (!dev)
399 		return;
400 
401 	hyp_spin_lock(&device_spinlock);
402 	hyp_refcount_dec(dev->refcount);
403 	hyp_spin_unlock(&device_spinlock);
404 }
405 
pkvm_device_register_reset(u64 phys,void * cookie,int (* cb)(void * cookie,bool host_to_guest))406 int pkvm_device_register_reset(u64 phys, void *cookie,
407 			       int (*cb)(void *cookie, bool host_to_guest))
408 {
409 	struct pkvm_device *dev;
410 	int ret = 0;
411 
412 	dev = pkvm_get_device_by_addr(phys);
413 	if (!dev)
414 		return -ENODEV;
415 
416 	hyp_spin_lock(&device_spinlock);
417 	if (!dev->reset_handler) {
418 		dev->reset_handler = cb;
419 		dev->cookie = cookie;
420 	} else {
421 		ret = -EBUSY;
422 	}
423 	hyp_spin_unlock(&device_spinlock);
424 
425 	return ret;
426 }
427 
pkvm_device_request_dma(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)428 bool pkvm_device_request_dma(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
429 {
430 	int ret;
431 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
432 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
433 	u64 pviommu = smccc_get_arg1(vcpu);
434 	u64 vsid = smccc_get_arg2(vcpu);
435 	u64 token1, token2;
436 	struct pviommu_route route;
437 	struct pkvm_device *dev;
438 
439 	if (smccc_get_arg3(vcpu) || smccc_get_arg4(vcpu) || smccc_get_arg5(vcpu) ||
440 	    smccc_get_arg6(vcpu))
441 		goto out_ret;
442 
443 	ret = pkvm_pviommu_get_route(vm, pviommu, vsid, &route);
444 	if (ret)
445 		goto out_ret;
446 	token2 = route.sid;
447 	/*
448 	 * route.iommu is the host-hyp iommu ID that has no meaning for guest.
449 	 * It needs to be converted to IOMMU token as in the firmware(usually
450 	 * base MMIO address).
451 	 */
452 	ret = kvm_iommu_id_to_token(route.iommu, &token1);
453 	if (ret)
454 		goto out_ret;
455 
456 	dev = pkvm_get_device_by_iommu(route.iommu, route.sid);
457 	if (!dev)
458 		goto out_ret;
459 
460 	hyp_spin_lock(&device_spinlock);
461 	if (dev->ctxt == NULL) {
462 		/*
463 		 * First time device is assigned to guest, make sure it's resources
464 		 * have been donated.
465 		 */
466 		ret = __pkvm_group_assign(dev->group_id, vm);
467 	} else if (dev->ctxt != vm) {
468 		ret = -EPERM;
469 	}
470 	hyp_spin_unlock(&device_spinlock);
471 	if (ret)
472 		goto out_ret;
473 
474 	smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, token1, token2, 0);
475 	return true;
476 out_ret:
477 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
478 	return true;
479 }
480