• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * KVM Microsoft Hyper-V emulation
3  *
4  * derived from arch/x86/kvm/x86.c
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright (C) 2008 Qumranet, Inc.
8  * Copyright IBM Corporation, 2008
9  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
10  * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
11  *
12  * Authors:
13  *   Avi Kivity   <avi@qumranet.com>
14  *   Yaniv Kamay  <yaniv@qumranet.com>
15  *   Amit Shah    <amit.shah@qumranet.com>
16  *   Ben-Ami Yassour <benami@il.ibm.com>
17  *   Andrey Smetanin <asmetanin@virtuozzo.com>
18  *
19  * This work is licensed under the terms of the GNU GPL, version 2.  See
20  * the COPYING file in the top-level directory.
21  *
22  */
23 
24 #include "x86.h"
25 #include "lapic.h"
26 #include "ioapic.h"
27 #include "hyperv.h"
28 
29 #include <linux/kvm_host.h>
30 #include <linux/highmem.h>
31 #include <linux/sched/cputime.h>
32 #include <linux/eventfd.h>
33 
34 #include <asm/apicdef.h>
35 #include <trace/events/kvm.h>
36 
37 #include "trace.h"
38 
synic_read_sint(struct kvm_vcpu_hv_synic * synic,int sint)39 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
40 {
41 	return atomic64_read(&synic->sint[sint]);
42 }
43 
synic_get_sint_vector(u64 sint_value)44 static inline int synic_get_sint_vector(u64 sint_value)
45 {
46 	if (sint_value & HV_SYNIC_SINT_MASKED)
47 		return -1;
48 	return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
49 }
50 
synic_has_vector_connected(struct kvm_vcpu_hv_synic * synic,int vector)51 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
52 				      int vector)
53 {
54 	int i;
55 
56 	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
57 		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
58 			return true;
59 	}
60 	return false;
61 }
62 
synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic * synic,int vector)63 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
64 				     int vector)
65 {
66 	int i;
67 	u64 sint_value;
68 
69 	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
70 		sint_value = synic_read_sint(synic, i);
71 		if (synic_get_sint_vector(sint_value) == vector &&
72 		    sint_value & HV_SYNIC_SINT_AUTO_EOI)
73 			return true;
74 	}
75 	return false;
76 }
77 
synic_update_vector(struct kvm_vcpu_hv_synic * synic,int vector)78 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
79 				int vector)
80 {
81 	if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
82 		return;
83 
84 	if (synic_has_vector_connected(synic, vector))
85 		__set_bit(vector, synic->vec_bitmap);
86 	else
87 		__clear_bit(vector, synic->vec_bitmap);
88 
89 	if (synic_has_vector_auto_eoi(synic, vector))
90 		__set_bit(vector, synic->auto_eoi_bitmap);
91 	else
92 		__clear_bit(vector, synic->auto_eoi_bitmap);
93 }
94 
synic_set_sint(struct kvm_vcpu_hv_synic * synic,int sint,u64 data,bool host)95 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
96 			  u64 data, bool host)
97 {
98 	int vector, old_vector;
99 	bool masked;
100 
101 	vector = data & HV_SYNIC_SINT_VECTOR_MASK;
102 	masked = data & HV_SYNIC_SINT_MASKED;
103 
104 	/*
105 	 * Valid vectors are 16-255, however, nested Hyper-V attempts to write
106 	 * default '0x10000' value on boot and this should not #GP. We need to
107 	 * allow zero-initing the register from host as well.
108 	 */
109 	if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
110 		return 1;
111 	/*
112 	 * Guest may configure multiple SINTs to use the same vector, so
113 	 * we maintain a bitmap of vectors handled by synic, and a
114 	 * bitmap of vectors with auto-eoi behavior.  The bitmaps are
115 	 * updated here, and atomically queried on fast paths.
116 	 */
117 	old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
118 
119 	atomic64_set(&synic->sint[sint], data);
120 
121 	synic_update_vector(synic, old_vector);
122 
123 	synic_update_vector(synic, vector);
124 
125 	/* Load SynIC vectors into EOI exit bitmap */
126 	kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
127 	return 0;
128 }
129 
get_vcpu_by_vpidx(struct kvm * kvm,u32 vpidx)130 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
131 {
132 	struct kvm_vcpu *vcpu = NULL;
133 	int i;
134 
135 	if (vpidx >= KVM_MAX_VCPUS)
136 		return NULL;
137 
138 	vcpu = kvm_get_vcpu(kvm, vpidx);
139 	if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
140 		return vcpu;
141 	kvm_for_each_vcpu(i, vcpu, kvm)
142 		if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
143 			return vcpu;
144 	return NULL;
145 }
146 
synic_get(struct kvm * kvm,u32 vpidx)147 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
148 {
149 	struct kvm_vcpu *vcpu;
150 	struct kvm_vcpu_hv_synic *synic;
151 
152 	vcpu = get_vcpu_by_vpidx(kvm, vpidx);
153 	if (!vcpu)
154 		return NULL;
155 	synic = vcpu_to_synic(vcpu);
156 	return (synic->active) ? synic : NULL;
157 }
158 
synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic * synic,u32 sint)159 static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
160 					u32 sint)
161 {
162 	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
163 	struct page *page;
164 	gpa_t gpa;
165 	struct hv_message *msg;
166 	struct hv_message_page *msg_page;
167 
168 	gpa = synic->msg_page & PAGE_MASK;
169 	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
170 	if (is_error_page(page)) {
171 		vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
172 			 gpa);
173 		return;
174 	}
175 	msg_page = kmap_atomic(page);
176 
177 	msg = &msg_page->sint_message[sint];
178 	msg->header.message_flags.msg_pending = 0;
179 
180 	kunmap_atomic(msg_page);
181 	kvm_release_page_dirty(page);
182 	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
183 }
184 
kvm_hv_notify_acked_sint(struct kvm_vcpu * vcpu,u32 sint)185 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
186 {
187 	struct kvm *kvm = vcpu->kvm;
188 	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
189 	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
190 	struct kvm_vcpu_hv_stimer *stimer;
191 	int gsi, idx, stimers_pending;
192 
193 	trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
194 
195 	if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
196 		synic_clear_sint_msg_pending(synic, sint);
197 
198 	/* Try to deliver pending Hyper-V SynIC timers messages */
199 	stimers_pending = 0;
200 	for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
201 		stimer = &hv_vcpu->stimer[idx];
202 		if (stimer->msg_pending &&
203 		    (stimer->config & HV_STIMER_ENABLE) &&
204 		    HV_STIMER_SINT(stimer->config) == sint) {
205 			set_bit(stimer->index,
206 				hv_vcpu->stimer_pending_bitmap);
207 			stimers_pending++;
208 		}
209 	}
210 	if (stimers_pending)
211 		kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
212 
213 	idx = srcu_read_lock(&kvm->irq_srcu);
214 	gsi = atomic_read(&synic->sint_to_gsi[sint]);
215 	if (gsi != -1)
216 		kvm_notify_acked_gsi(kvm, gsi);
217 	srcu_read_unlock(&kvm->irq_srcu, idx);
218 }
219 
synic_exit(struct kvm_vcpu_hv_synic * synic,u32 msr)220 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
221 {
222 	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
223 	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
224 
225 	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
226 	hv_vcpu->exit.u.synic.msr = msr;
227 	hv_vcpu->exit.u.synic.control = synic->control;
228 	hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
229 	hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
230 
231 	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
232 }
233 
synic_set_msr(struct kvm_vcpu_hv_synic * synic,u32 msr,u64 data,bool host)234 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
235 			 u32 msr, u64 data, bool host)
236 {
237 	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
238 	int ret;
239 
240 	if (!synic->active && !host)
241 		return 1;
242 
243 	trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
244 
245 	ret = 0;
246 	switch (msr) {
247 	case HV_X64_MSR_SCONTROL:
248 		synic->control = data;
249 		if (!host)
250 			synic_exit(synic, msr);
251 		break;
252 	case HV_X64_MSR_SVERSION:
253 		if (!host) {
254 			ret = 1;
255 			break;
256 		}
257 		synic->version = data;
258 		break;
259 	case HV_X64_MSR_SIEFP:
260 		if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
261 		    !synic->dont_zero_synic_pages)
262 			if (kvm_clear_guest(vcpu->kvm,
263 					    data & PAGE_MASK, PAGE_SIZE)) {
264 				ret = 1;
265 				break;
266 			}
267 		synic->evt_page = data;
268 		if (!host)
269 			synic_exit(synic, msr);
270 		break;
271 	case HV_X64_MSR_SIMP:
272 		if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
273 		    !synic->dont_zero_synic_pages)
274 			if (kvm_clear_guest(vcpu->kvm,
275 					    data & PAGE_MASK, PAGE_SIZE)) {
276 				ret = 1;
277 				break;
278 			}
279 		synic->msg_page = data;
280 		if (!host)
281 			synic_exit(synic, msr);
282 		break;
283 	case HV_X64_MSR_EOM: {
284 		int i;
285 
286 		for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
287 			kvm_hv_notify_acked_sint(vcpu, i);
288 		break;
289 	}
290 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
291 		ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
292 		break;
293 	default:
294 		ret = 1;
295 		break;
296 	}
297 	return ret;
298 }
299 
synic_get_msr(struct kvm_vcpu_hv_synic * synic,u32 msr,u64 * pdata,bool host)300 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
301 			 bool host)
302 {
303 	int ret;
304 
305 	if (!synic->active && !host)
306 		return 1;
307 
308 	ret = 0;
309 	switch (msr) {
310 	case HV_X64_MSR_SCONTROL:
311 		*pdata = synic->control;
312 		break;
313 	case HV_X64_MSR_SVERSION:
314 		*pdata = synic->version;
315 		break;
316 	case HV_X64_MSR_SIEFP:
317 		*pdata = synic->evt_page;
318 		break;
319 	case HV_X64_MSR_SIMP:
320 		*pdata = synic->msg_page;
321 		break;
322 	case HV_X64_MSR_EOM:
323 		*pdata = 0;
324 		break;
325 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
326 		*pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
327 		break;
328 	default:
329 		ret = 1;
330 		break;
331 	}
332 	return ret;
333 }
334 
synic_set_irq(struct kvm_vcpu_hv_synic * synic,u32 sint)335 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
336 {
337 	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
338 	struct kvm_lapic_irq irq;
339 	int ret, vector;
340 
341 	if (sint >= ARRAY_SIZE(synic->sint))
342 		return -EINVAL;
343 
344 	vector = synic_get_sint_vector(synic_read_sint(synic, sint));
345 	if (vector < 0)
346 		return -ENOENT;
347 
348 	memset(&irq, 0, sizeof(irq));
349 	irq.shorthand = APIC_DEST_SELF;
350 	irq.dest_mode = APIC_DEST_PHYSICAL;
351 	irq.delivery_mode = APIC_DM_FIXED;
352 	irq.vector = vector;
353 	irq.level = 1;
354 
355 	ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
356 	trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
357 	return ret;
358 }
359 
kvm_hv_synic_set_irq(struct kvm * kvm,u32 vpidx,u32 sint)360 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
361 {
362 	struct kvm_vcpu_hv_synic *synic;
363 
364 	synic = synic_get(kvm, vpidx);
365 	if (!synic)
366 		return -EINVAL;
367 
368 	return synic_set_irq(synic, sint);
369 }
370 
kvm_hv_synic_send_eoi(struct kvm_vcpu * vcpu,int vector)371 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
372 {
373 	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
374 	int i;
375 
376 	trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
377 
378 	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
379 		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
380 			kvm_hv_notify_acked_sint(vcpu, i);
381 }
382 
kvm_hv_set_sint_gsi(struct kvm * kvm,u32 vpidx,u32 sint,int gsi)383 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
384 {
385 	struct kvm_vcpu_hv_synic *synic;
386 
387 	synic = synic_get(kvm, vpidx);
388 	if (!synic)
389 		return -EINVAL;
390 
391 	if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
392 		return -EINVAL;
393 
394 	atomic_set(&synic->sint_to_gsi[sint], gsi);
395 	return 0;
396 }
397 
kvm_hv_irq_routing_update(struct kvm * kvm)398 void kvm_hv_irq_routing_update(struct kvm *kvm)
399 {
400 	struct kvm_irq_routing_table *irq_rt;
401 	struct kvm_kernel_irq_routing_entry *e;
402 	u32 gsi;
403 
404 	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
405 					lockdep_is_held(&kvm->irq_lock));
406 
407 	for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
408 		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
409 			if (e->type == KVM_IRQ_ROUTING_HV_SINT)
410 				kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
411 						    e->hv_sint.sint, gsi);
412 		}
413 	}
414 }
415 
synic_init(struct kvm_vcpu_hv_synic * synic)416 static void synic_init(struct kvm_vcpu_hv_synic *synic)
417 {
418 	int i;
419 
420 	memset(synic, 0, sizeof(*synic));
421 	synic->version = HV_SYNIC_VERSION_1;
422 	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
423 		atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
424 		atomic_set(&synic->sint_to_gsi[i], -1);
425 	}
426 }
427 
get_time_ref_counter(struct kvm * kvm)428 static u64 get_time_ref_counter(struct kvm *kvm)
429 {
430 	struct kvm_hv *hv = &kvm->arch.hyperv;
431 	struct kvm_vcpu *vcpu;
432 	u64 tsc;
433 
434 	/*
435 	 * The guest has not set up the TSC page or the clock isn't
436 	 * stable, fall back to get_kvmclock_ns.
437 	 */
438 	if (!hv->tsc_ref.tsc_sequence)
439 		return div_u64(get_kvmclock_ns(kvm), 100);
440 
441 	vcpu = kvm_get_vcpu(kvm, 0);
442 	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
443 	return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
444 		+ hv->tsc_ref.tsc_offset;
445 }
446 
stimer_mark_pending(struct kvm_vcpu_hv_stimer * stimer,bool vcpu_kick)447 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
448 				bool vcpu_kick)
449 {
450 	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
451 
452 	set_bit(stimer->index,
453 		vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
454 	kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
455 	if (vcpu_kick)
456 		kvm_vcpu_kick(vcpu);
457 }
458 
stimer_cleanup(struct kvm_vcpu_hv_stimer * stimer)459 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
460 {
461 	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
462 
463 	trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
464 				    stimer->index);
465 
466 	hrtimer_cancel(&stimer->timer);
467 	clear_bit(stimer->index,
468 		  vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
469 	stimer->msg_pending = false;
470 	stimer->exp_time = 0;
471 }
472 
stimer_timer_callback(struct hrtimer * timer)473 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
474 {
475 	struct kvm_vcpu_hv_stimer *stimer;
476 
477 	stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
478 	trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
479 				     stimer->index);
480 	stimer_mark_pending(stimer, true);
481 
482 	return HRTIMER_NORESTART;
483 }
484 
485 /*
486  * stimer_start() assumptions:
487  * a) stimer->count is not equal to 0
488  * b) stimer->config has HV_STIMER_ENABLE flag
489  */
stimer_start(struct kvm_vcpu_hv_stimer * stimer)490 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
491 {
492 	u64 time_now;
493 	ktime_t ktime_now;
494 
495 	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
496 	ktime_now = ktime_get();
497 
498 	if (stimer->config & HV_STIMER_PERIODIC) {
499 		if (stimer->exp_time) {
500 			if (time_now >= stimer->exp_time) {
501 				u64 remainder;
502 
503 				div64_u64_rem(time_now - stimer->exp_time,
504 					      stimer->count, &remainder);
505 				stimer->exp_time =
506 					time_now + (stimer->count - remainder);
507 			}
508 		} else
509 			stimer->exp_time = time_now + stimer->count;
510 
511 		trace_kvm_hv_stimer_start_periodic(
512 					stimer_to_vcpu(stimer)->vcpu_id,
513 					stimer->index,
514 					time_now, stimer->exp_time);
515 
516 		hrtimer_start(&stimer->timer,
517 			      ktime_add_ns(ktime_now,
518 					   100 * (stimer->exp_time - time_now)),
519 			      HRTIMER_MODE_ABS);
520 		return 0;
521 	}
522 	stimer->exp_time = stimer->count;
523 	if (time_now >= stimer->count) {
524 		/*
525 		 * Expire timer according to Hypervisor Top-Level Functional
526 		 * specification v4(15.3.1):
527 		 * "If a one shot is enabled and the specified count is in
528 		 * the past, it will expire immediately."
529 		 */
530 		stimer_mark_pending(stimer, false);
531 		return 0;
532 	}
533 
534 	trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
535 					   stimer->index,
536 					   time_now, stimer->count);
537 
538 	hrtimer_start(&stimer->timer,
539 		      ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
540 		      HRTIMER_MODE_ABS);
541 	return 0;
542 }
543 
stimer_set_config(struct kvm_vcpu_hv_stimer * stimer,u64 config,bool host)544 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
545 			     bool host)
546 {
547 	trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
548 				       stimer->index, config, host);
549 
550 	stimer_cleanup(stimer);
551 	if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
552 		config &= ~HV_STIMER_ENABLE;
553 	stimer->config = config;
554 	stimer_mark_pending(stimer, false);
555 	return 0;
556 }
557 
stimer_set_count(struct kvm_vcpu_hv_stimer * stimer,u64 count,bool host)558 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
559 			    bool host)
560 {
561 	trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
562 				      stimer->index, count, host);
563 
564 	stimer_cleanup(stimer);
565 	stimer->count = count;
566 	if (stimer->count == 0)
567 		stimer->config &= ~HV_STIMER_ENABLE;
568 	else if (stimer->config & HV_STIMER_AUTOENABLE)
569 		stimer->config |= HV_STIMER_ENABLE;
570 	stimer_mark_pending(stimer, false);
571 	return 0;
572 }
573 
stimer_get_config(struct kvm_vcpu_hv_stimer * stimer,u64 * pconfig)574 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
575 {
576 	*pconfig = stimer->config;
577 	return 0;
578 }
579 
stimer_get_count(struct kvm_vcpu_hv_stimer * stimer,u64 * pcount)580 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
581 {
582 	*pcount = stimer->count;
583 	return 0;
584 }
585 
synic_deliver_msg(struct kvm_vcpu_hv_synic * synic,u32 sint,struct hv_message * src_msg)586 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
587 			     struct hv_message *src_msg)
588 {
589 	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
590 	struct page *page;
591 	gpa_t gpa;
592 	struct hv_message *dst_msg;
593 	int r;
594 	struct hv_message_page *msg_page;
595 
596 	if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
597 		return -ENOENT;
598 
599 	gpa = synic->msg_page & PAGE_MASK;
600 	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
601 	if (is_error_page(page))
602 		return -EFAULT;
603 
604 	msg_page = kmap_atomic(page);
605 	dst_msg = &msg_page->sint_message[sint];
606 	if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
607 			 src_msg->header.message_type) != HVMSG_NONE) {
608 		dst_msg->header.message_flags.msg_pending = 1;
609 		r = -EAGAIN;
610 	} else {
611 		memcpy(&dst_msg->u.payload, &src_msg->u.payload,
612 		       src_msg->header.payload_size);
613 		dst_msg->header.message_type = src_msg->header.message_type;
614 		dst_msg->header.payload_size = src_msg->header.payload_size;
615 		r = synic_set_irq(synic, sint);
616 		if (r >= 1)
617 			r = 0;
618 		else if (r == 0)
619 			r = -EFAULT;
620 	}
621 	kunmap_atomic(msg_page);
622 	kvm_release_page_dirty(page);
623 	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
624 	return r;
625 }
626 
stimer_send_msg(struct kvm_vcpu_hv_stimer * stimer)627 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
628 {
629 	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
630 	struct hv_message *msg = &stimer->msg;
631 	struct hv_timer_message_payload *payload =
632 			(struct hv_timer_message_payload *)&msg->u.payload;
633 
634 	payload->expiration_time = stimer->exp_time;
635 	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
636 	return synic_deliver_msg(vcpu_to_synic(vcpu),
637 				 HV_STIMER_SINT(stimer->config), msg);
638 }
639 
stimer_expiration(struct kvm_vcpu_hv_stimer * stimer)640 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
641 {
642 	int r;
643 
644 	stimer->msg_pending = true;
645 	r = stimer_send_msg(stimer);
646 	trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
647 				       stimer->index, r);
648 	if (!r) {
649 		stimer->msg_pending = false;
650 		if (!(stimer->config & HV_STIMER_PERIODIC))
651 			stimer->config &= ~HV_STIMER_ENABLE;
652 	}
653 }
654 
kvm_hv_process_stimers(struct kvm_vcpu * vcpu)655 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
656 {
657 	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
658 	struct kvm_vcpu_hv_stimer *stimer;
659 	u64 time_now, exp_time;
660 	int i;
661 
662 	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
663 		if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
664 			stimer = &hv_vcpu->stimer[i];
665 			if (stimer->config & HV_STIMER_ENABLE) {
666 				exp_time = stimer->exp_time;
667 
668 				if (exp_time) {
669 					time_now =
670 						get_time_ref_counter(vcpu->kvm);
671 					if (time_now >= exp_time)
672 						stimer_expiration(stimer);
673 				}
674 
675 				if ((stimer->config & HV_STIMER_ENABLE) &&
676 				    stimer->count) {
677 					if (!stimer->msg_pending)
678 						stimer_start(stimer);
679 				} else
680 					stimer_cleanup(stimer);
681 			}
682 		}
683 }
684 
kvm_hv_vcpu_uninit(struct kvm_vcpu * vcpu)685 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
686 {
687 	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
688 	int i;
689 
690 	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
691 		stimer_cleanup(&hv_vcpu->stimer[i]);
692 }
693 
kvm_hv_assist_page_enabled(struct kvm_vcpu * vcpu)694 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
695 {
696 	if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
697 		return false;
698 	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
699 }
700 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
701 
kvm_hv_get_assist_page(struct kvm_vcpu * vcpu,struct hv_vp_assist_page * assist_page)702 bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
703 			    struct hv_vp_assist_page *assist_page)
704 {
705 	if (!kvm_hv_assist_page_enabled(vcpu))
706 		return false;
707 	return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
708 				      assist_page, sizeof(*assist_page));
709 }
710 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
711 
stimer_prepare_msg(struct kvm_vcpu_hv_stimer * stimer)712 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
713 {
714 	struct hv_message *msg = &stimer->msg;
715 	struct hv_timer_message_payload *payload =
716 			(struct hv_timer_message_payload *)&msg->u.payload;
717 
718 	memset(&msg->header, 0, sizeof(msg->header));
719 	msg->header.message_type = HVMSG_TIMER_EXPIRED;
720 	msg->header.payload_size = sizeof(*payload);
721 
722 	payload->timer_index = stimer->index;
723 	payload->expiration_time = 0;
724 	payload->delivery_time = 0;
725 }
726 
stimer_init(struct kvm_vcpu_hv_stimer * stimer,int timer_index)727 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
728 {
729 	memset(stimer, 0, sizeof(*stimer));
730 	stimer->index = timer_index;
731 	hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
732 	stimer->timer.function = stimer_timer_callback;
733 	stimer_prepare_msg(stimer);
734 }
735 
kvm_hv_vcpu_init(struct kvm_vcpu * vcpu)736 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
737 {
738 	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
739 	int i;
740 
741 	synic_init(&hv_vcpu->synic);
742 
743 	bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
744 	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
745 		stimer_init(&hv_vcpu->stimer[i], i);
746 }
747 
kvm_hv_vcpu_postcreate(struct kvm_vcpu * vcpu)748 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
749 {
750 	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
751 
752 	hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
753 }
754 
kvm_hv_activate_synic(struct kvm_vcpu * vcpu,bool dont_zero_synic_pages)755 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
756 {
757 	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
758 
759 	/*
760 	 * Hyper-V SynIC auto EOI SINT's are
761 	 * not compatible with APICV, so deactivate APICV
762 	 */
763 	kvm_vcpu_deactivate_apicv(vcpu);
764 	synic->active = true;
765 	synic->dont_zero_synic_pages = dont_zero_synic_pages;
766 	return 0;
767 }
768 
kvm_hv_msr_partition_wide(u32 msr)769 static bool kvm_hv_msr_partition_wide(u32 msr)
770 {
771 	bool r = false;
772 
773 	switch (msr) {
774 	case HV_X64_MSR_GUEST_OS_ID:
775 	case HV_X64_MSR_HYPERCALL:
776 	case HV_X64_MSR_REFERENCE_TSC:
777 	case HV_X64_MSR_TIME_REF_COUNT:
778 	case HV_X64_MSR_CRASH_CTL:
779 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
780 	case HV_X64_MSR_RESET:
781 	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
782 	case HV_X64_MSR_TSC_EMULATION_CONTROL:
783 	case HV_X64_MSR_TSC_EMULATION_STATUS:
784 		r = true;
785 		break;
786 	}
787 
788 	return r;
789 }
790 
kvm_hv_msr_get_crash_data(struct kvm_vcpu * vcpu,u32 index,u64 * pdata)791 static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
792 				     u32 index, u64 *pdata)
793 {
794 	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
795 	size_t size = ARRAY_SIZE(hv->hv_crash_param);
796 
797 	if (WARN_ON_ONCE(index >= size))
798 		return -EINVAL;
799 
800 	*pdata = hv->hv_crash_param[array_index_nospec(index, size)];
801 	return 0;
802 }
803 
kvm_hv_msr_get_crash_ctl(struct kvm_vcpu * vcpu,u64 * pdata)804 static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
805 {
806 	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
807 
808 	*pdata = hv->hv_crash_ctl;
809 	return 0;
810 }
811 
kvm_hv_msr_set_crash_ctl(struct kvm_vcpu * vcpu,u64 data,bool host)812 static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
813 {
814 	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
815 
816 	if (host)
817 		hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
818 
819 	if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
820 
821 		vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
822 			  hv->hv_crash_param[0],
823 			  hv->hv_crash_param[1],
824 			  hv->hv_crash_param[2],
825 			  hv->hv_crash_param[3],
826 			  hv->hv_crash_param[4]);
827 
828 		/* Send notification about crash to user space */
829 		kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
830 	}
831 
832 	return 0;
833 }
834 
kvm_hv_msr_set_crash_data(struct kvm_vcpu * vcpu,u32 index,u64 data)835 static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
836 				     u32 index, u64 data)
837 {
838 	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
839 	size_t size = ARRAY_SIZE(hv->hv_crash_param);
840 
841 	if (WARN_ON_ONCE(index >= size))
842 		return -EINVAL;
843 
844 	hv->hv_crash_param[array_index_nospec(index, size)] = data;
845 	return 0;
846 }
847 
848 /*
849  * The kvmclock and Hyper-V TSC page use similar formulas, and converting
850  * between them is possible:
851  *
852  * kvmclock formula:
853  *    nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
854  *           + system_time
855  *
856  * Hyper-V formula:
857  *    nsec/100 = ticks * scale / 2^64 + offset
858  *
859  * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
860  * By dividing the kvmclock formula by 100 and equating what's left we get:
861  *    ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
862  *            scale / 2^64 =         tsc_to_system_mul * 2^(tsc_shift-32) / 100
863  *            scale        =         tsc_to_system_mul * 2^(32+tsc_shift) / 100
864  *
865  * Now expand the kvmclock formula and divide by 100:
866  *    nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
867  *           - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
868  *           + system_time
869  *    nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
870  *               - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
871  *               + system_time / 100
872  *
873  * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
874  *    nsec/100 = ticks * scale / 2^64
875  *               - tsc_timestamp * scale / 2^64
876  *               + system_time / 100
877  *
878  * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
879  *    offset = system_time / 100 - tsc_timestamp * scale / 2^64
880  *
881  * These two equivalencies are implemented in this function.
882  */
compute_tsc_page_parameters(struct pvclock_vcpu_time_info * hv_clock,HV_REFERENCE_TSC_PAGE * tsc_ref)883 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
884 					HV_REFERENCE_TSC_PAGE *tsc_ref)
885 {
886 	u64 max_mul;
887 
888 	if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
889 		return false;
890 
891 	/*
892 	 * check if scale would overflow, if so we use the time ref counter
893 	 *    tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
894 	 *    tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
895 	 *    tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
896 	 */
897 	max_mul = 100ull << (32 - hv_clock->tsc_shift);
898 	if (hv_clock->tsc_to_system_mul >= max_mul)
899 		return false;
900 
901 	/*
902 	 * Otherwise compute the scale and offset according to the formulas
903 	 * derived above.
904 	 */
905 	tsc_ref->tsc_scale =
906 		mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
907 				hv_clock->tsc_to_system_mul,
908 				100);
909 
910 	tsc_ref->tsc_offset = hv_clock->system_time;
911 	do_div(tsc_ref->tsc_offset, 100);
912 	tsc_ref->tsc_offset -=
913 		mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
914 	return true;
915 }
916 
kvm_hv_setup_tsc_page(struct kvm * kvm,struct pvclock_vcpu_time_info * hv_clock)917 void kvm_hv_setup_tsc_page(struct kvm *kvm,
918 			   struct pvclock_vcpu_time_info *hv_clock)
919 {
920 	struct kvm_hv *hv = &kvm->arch.hyperv;
921 	u32 tsc_seq;
922 	u64 gfn;
923 
924 	BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
925 	BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
926 
927 	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
928 		return;
929 
930 	mutex_lock(&kvm->arch.hyperv.hv_lock);
931 	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
932 		goto out_unlock;
933 
934 	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
935 	/*
936 	 * Because the TSC parameters only vary when there is a
937 	 * change in the master clock, do not bother with caching.
938 	 */
939 	if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
940 				    &tsc_seq, sizeof(tsc_seq))))
941 		goto out_unlock;
942 
943 	/*
944 	 * While we're computing and writing the parameters, force the
945 	 * guest to use the time reference count MSR.
946 	 */
947 	hv->tsc_ref.tsc_sequence = 0;
948 	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
949 			    &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
950 		goto out_unlock;
951 
952 	if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
953 		goto out_unlock;
954 
955 	/* Ensure sequence is zero before writing the rest of the struct.  */
956 	smp_wmb();
957 	if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
958 		goto out_unlock;
959 
960 	/*
961 	 * Now switch to the TSC page mechanism by writing the sequence.
962 	 */
963 	tsc_seq++;
964 	if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
965 		tsc_seq = 1;
966 
967 	/* Write the struct entirely before the non-zero sequence.  */
968 	smp_wmb();
969 
970 	hv->tsc_ref.tsc_sequence = tsc_seq;
971 	kvm_write_guest(kvm, gfn_to_gpa(gfn),
972 			&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
973 out_unlock:
974 	mutex_unlock(&kvm->arch.hyperv.hv_lock);
975 }
976 
kvm_hv_set_msr_pw(struct kvm_vcpu * vcpu,u32 msr,u64 data,bool host)977 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
978 			     bool host)
979 {
980 	struct kvm *kvm = vcpu->kvm;
981 	struct kvm_hv *hv = &kvm->arch.hyperv;
982 
983 	switch (msr) {
984 	case HV_X64_MSR_GUEST_OS_ID:
985 		hv->hv_guest_os_id = data;
986 		/* setting guest os id to zero disables hypercall page */
987 		if (!hv->hv_guest_os_id)
988 			hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
989 		break;
990 	case HV_X64_MSR_HYPERCALL: {
991 		u64 gfn;
992 		unsigned long addr;
993 		u8 instructions[4];
994 
995 		/* if guest os id is not set hypercall should remain disabled */
996 		if (!hv->hv_guest_os_id)
997 			break;
998 		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
999 			hv->hv_hypercall = data;
1000 			break;
1001 		}
1002 		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1003 		addr = gfn_to_hva(kvm, gfn);
1004 		if (kvm_is_error_hva(addr))
1005 			return 1;
1006 		kvm_x86_ops->patch_hypercall(vcpu, instructions);
1007 		((unsigned char *)instructions)[3] = 0xc3; /* ret */
1008 		if (__copy_to_user((void __user *)addr, instructions, 4))
1009 			return 1;
1010 		hv->hv_hypercall = data;
1011 		mark_page_dirty(kvm, gfn);
1012 		break;
1013 	}
1014 	case HV_X64_MSR_REFERENCE_TSC:
1015 		hv->hv_tsc_page = data;
1016 		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
1017 			kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1018 		break;
1019 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1020 		return kvm_hv_msr_set_crash_data(vcpu,
1021 						 msr - HV_X64_MSR_CRASH_P0,
1022 						 data);
1023 	case HV_X64_MSR_CRASH_CTL:
1024 		return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
1025 	case HV_X64_MSR_RESET:
1026 		if (data == 1) {
1027 			vcpu_debug(vcpu, "hyper-v reset requested\n");
1028 			kvm_make_request(KVM_REQ_HV_RESET, vcpu);
1029 		}
1030 		break;
1031 	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1032 		hv->hv_reenlightenment_control = data;
1033 		break;
1034 	case HV_X64_MSR_TSC_EMULATION_CONTROL:
1035 		hv->hv_tsc_emulation_control = data;
1036 		break;
1037 	case HV_X64_MSR_TSC_EMULATION_STATUS:
1038 		hv->hv_tsc_emulation_status = data;
1039 		break;
1040 	case HV_X64_MSR_TIME_REF_COUNT:
1041 		/* read-only, but still ignore it if host-initiated */
1042 		if (!host)
1043 			return 1;
1044 		break;
1045 	default:
1046 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
1047 			    msr, data);
1048 		return 1;
1049 	}
1050 	return 0;
1051 }
1052 
1053 /* Calculate cpu time spent by current task in 100ns units */
current_task_runtime_100ns(void)1054 static u64 current_task_runtime_100ns(void)
1055 {
1056 	u64 utime, stime;
1057 
1058 	task_cputime_adjusted(current, &utime, &stime);
1059 
1060 	return div_u64(utime + stime, 100);
1061 }
1062 
kvm_hv_set_msr(struct kvm_vcpu * vcpu,u32 msr,u64 data,bool host)1063 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
1064 {
1065 	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
1066 
1067 	switch (msr) {
1068 	case HV_X64_MSR_VP_INDEX: {
1069 		struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
1070 		int vcpu_idx = kvm_vcpu_get_idx(vcpu);
1071 		u32 new_vp_index = (u32)data;
1072 
1073 		if (!host || new_vp_index >= KVM_MAX_VCPUS)
1074 			return 1;
1075 
1076 		if (new_vp_index == hv_vcpu->vp_index)
1077 			return 0;
1078 
1079 		/*
1080 		 * The VP index is initialized to vcpu_index by
1081 		 * kvm_hv_vcpu_postcreate so they initially match.  Now the
1082 		 * VP index is changing, adjust num_mismatched_vp_indexes if
1083 		 * it now matches or no longer matches vcpu_idx.
1084 		 */
1085 		if (hv_vcpu->vp_index == vcpu_idx)
1086 			atomic_inc(&hv->num_mismatched_vp_indexes);
1087 		else if (new_vp_index == vcpu_idx)
1088 			atomic_dec(&hv->num_mismatched_vp_indexes);
1089 
1090 		hv_vcpu->vp_index = new_vp_index;
1091 		break;
1092 	}
1093 	case HV_X64_MSR_VP_ASSIST_PAGE: {
1094 		u64 gfn;
1095 		unsigned long addr;
1096 
1097 		if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
1098 			hv_vcpu->hv_vapic = data;
1099 			if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
1100 				return 1;
1101 			break;
1102 		}
1103 		gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
1104 		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
1105 		if (kvm_is_error_hva(addr))
1106 			return 1;
1107 		if (__clear_user((void __user *)addr, PAGE_SIZE))
1108 			return 1;
1109 		hv_vcpu->hv_vapic = data;
1110 		kvm_vcpu_mark_page_dirty(vcpu, gfn);
1111 		if (kvm_lapic_enable_pv_eoi(vcpu,
1112 					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
1113 					    sizeof(struct hv_vp_assist_page)))
1114 			return 1;
1115 		break;
1116 	}
1117 	case HV_X64_MSR_EOI:
1118 		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1119 	case HV_X64_MSR_ICR:
1120 		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1121 	case HV_X64_MSR_TPR:
1122 		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1123 	case HV_X64_MSR_VP_RUNTIME:
1124 		if (!host)
1125 			return 1;
1126 		hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
1127 		break;
1128 	case HV_X64_MSR_SCONTROL:
1129 	case HV_X64_MSR_SVERSION:
1130 	case HV_X64_MSR_SIEFP:
1131 	case HV_X64_MSR_SIMP:
1132 	case HV_X64_MSR_EOM:
1133 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1134 		return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
1135 	case HV_X64_MSR_STIMER0_CONFIG:
1136 	case HV_X64_MSR_STIMER1_CONFIG:
1137 	case HV_X64_MSR_STIMER2_CONFIG:
1138 	case HV_X64_MSR_STIMER3_CONFIG: {
1139 		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
1140 
1141 		return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
1142 					 data, host);
1143 	}
1144 	case HV_X64_MSR_STIMER0_COUNT:
1145 	case HV_X64_MSR_STIMER1_COUNT:
1146 	case HV_X64_MSR_STIMER2_COUNT:
1147 	case HV_X64_MSR_STIMER3_COUNT: {
1148 		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
1149 
1150 		return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
1151 					data, host);
1152 	}
1153 	case HV_X64_MSR_TSC_FREQUENCY:
1154 	case HV_X64_MSR_APIC_FREQUENCY:
1155 		/* read-only, but still ignore it if host-initiated */
1156 		if (!host)
1157 			return 1;
1158 		break;
1159 	default:
1160 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
1161 			    msr, data);
1162 		return 1;
1163 	}
1164 
1165 	return 0;
1166 }
1167 
kvm_hv_get_msr_pw(struct kvm_vcpu * vcpu,u32 msr,u64 * pdata)1168 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1169 {
1170 	u64 data = 0;
1171 	struct kvm *kvm = vcpu->kvm;
1172 	struct kvm_hv *hv = &kvm->arch.hyperv;
1173 
1174 	switch (msr) {
1175 	case HV_X64_MSR_GUEST_OS_ID:
1176 		data = hv->hv_guest_os_id;
1177 		break;
1178 	case HV_X64_MSR_HYPERCALL:
1179 		data = hv->hv_hypercall;
1180 		break;
1181 	case HV_X64_MSR_TIME_REF_COUNT:
1182 		data = get_time_ref_counter(kvm);
1183 		break;
1184 	case HV_X64_MSR_REFERENCE_TSC:
1185 		data = hv->hv_tsc_page;
1186 		break;
1187 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1188 		return kvm_hv_msr_get_crash_data(vcpu,
1189 						 msr - HV_X64_MSR_CRASH_P0,
1190 						 pdata);
1191 	case HV_X64_MSR_CRASH_CTL:
1192 		return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
1193 	case HV_X64_MSR_RESET:
1194 		data = 0;
1195 		break;
1196 	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1197 		data = hv->hv_reenlightenment_control;
1198 		break;
1199 	case HV_X64_MSR_TSC_EMULATION_CONTROL:
1200 		data = hv->hv_tsc_emulation_control;
1201 		break;
1202 	case HV_X64_MSR_TSC_EMULATION_STATUS:
1203 		data = hv->hv_tsc_emulation_status;
1204 		break;
1205 	default:
1206 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1207 		return 1;
1208 	}
1209 
1210 	*pdata = data;
1211 	return 0;
1212 }
1213 
kvm_hv_get_msr(struct kvm_vcpu * vcpu,u32 msr,u64 * pdata,bool host)1214 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
1215 			  bool host)
1216 {
1217 	u64 data = 0;
1218 	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
1219 
1220 	switch (msr) {
1221 	case HV_X64_MSR_VP_INDEX:
1222 		data = hv_vcpu->vp_index;
1223 		break;
1224 	case HV_X64_MSR_EOI:
1225 		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1226 	case HV_X64_MSR_ICR:
1227 		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1228 	case HV_X64_MSR_TPR:
1229 		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1230 	case HV_X64_MSR_VP_ASSIST_PAGE:
1231 		data = hv_vcpu->hv_vapic;
1232 		break;
1233 	case HV_X64_MSR_VP_RUNTIME:
1234 		data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
1235 		break;
1236 	case HV_X64_MSR_SCONTROL:
1237 	case HV_X64_MSR_SVERSION:
1238 	case HV_X64_MSR_SIEFP:
1239 	case HV_X64_MSR_SIMP:
1240 	case HV_X64_MSR_EOM:
1241 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1242 		return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host);
1243 	case HV_X64_MSR_STIMER0_CONFIG:
1244 	case HV_X64_MSR_STIMER1_CONFIG:
1245 	case HV_X64_MSR_STIMER2_CONFIG:
1246 	case HV_X64_MSR_STIMER3_CONFIG: {
1247 		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
1248 
1249 		return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
1250 					 pdata);
1251 	}
1252 	case HV_X64_MSR_STIMER0_COUNT:
1253 	case HV_X64_MSR_STIMER1_COUNT:
1254 	case HV_X64_MSR_STIMER2_COUNT:
1255 	case HV_X64_MSR_STIMER3_COUNT: {
1256 		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
1257 
1258 		return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
1259 					pdata);
1260 	}
1261 	case HV_X64_MSR_TSC_FREQUENCY:
1262 		data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
1263 		break;
1264 	case HV_X64_MSR_APIC_FREQUENCY:
1265 		data = APIC_BUS_FREQUENCY;
1266 		break;
1267 	default:
1268 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1269 		return 1;
1270 	}
1271 	*pdata = data;
1272 	return 0;
1273 }
1274 
kvm_hv_set_msr_common(struct kvm_vcpu * vcpu,u32 msr,u64 data,bool host)1275 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
1276 {
1277 	if (kvm_hv_msr_partition_wide(msr)) {
1278 		int r;
1279 
1280 		mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
1281 		r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
1282 		mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
1283 		return r;
1284 	} else
1285 		return kvm_hv_set_msr(vcpu, msr, data, host);
1286 }
1287 
kvm_hv_get_msr_common(struct kvm_vcpu * vcpu,u32 msr,u64 * pdata,bool host)1288 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
1289 {
1290 	if (kvm_hv_msr_partition_wide(msr)) {
1291 		int r;
1292 
1293 		mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
1294 		r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
1295 		mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
1296 		return r;
1297 	} else
1298 		return kvm_hv_get_msr(vcpu, msr, pdata, host);
1299 }
1300 
get_sparse_bank_no(u64 valid_bank_mask,int bank_no)1301 static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
1302 {
1303 	int i = 0, j;
1304 
1305 	if (!(valid_bank_mask & BIT_ULL(bank_no)))
1306 		return -1;
1307 
1308 	for (j = 0; j < bank_no; j++)
1309 		if (valid_bank_mask & BIT_ULL(j))
1310 			i++;
1311 
1312 	return i;
1313 }
1314 
kvm_hv_flush_tlb(struct kvm_vcpu * current_vcpu,u64 ingpa,u16 rep_cnt,bool ex)1315 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
1316 			    u16 rep_cnt, bool ex)
1317 {
1318 	struct kvm *kvm = current_vcpu->kvm;
1319 	struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
1320 	struct hv_tlb_flush_ex flush_ex;
1321 	struct hv_tlb_flush flush;
1322 	struct kvm_vcpu *vcpu;
1323 	unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
1324 	unsigned long valid_bank_mask = 0;
1325 	u64 sparse_banks[64];
1326 	int sparse_banks_len, i;
1327 	bool all_cpus;
1328 
1329 	if (!ex) {
1330 		if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
1331 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1332 
1333 		trace_kvm_hv_flush_tlb(flush.processor_mask,
1334 				       flush.address_space, flush.flags);
1335 
1336 		sparse_banks[0] = flush.processor_mask;
1337 
1338 		/*
1339 		 * Work around possible WS2012 bug: it sends hypercalls
1340 		 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
1341 		 * while also expecting us to flush something and crashing if
1342 		 * we don't. Let's treat processor_mask == 0 same as
1343 		 * HV_FLUSH_ALL_PROCESSORS.
1344 		 */
1345 		all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
1346 			flush.processor_mask == 0;
1347 	} else {
1348 		if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
1349 					    sizeof(flush_ex))))
1350 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1351 
1352 		trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
1353 					  flush_ex.hv_vp_set.format,
1354 					  flush_ex.address_space,
1355 					  flush_ex.flags);
1356 
1357 		valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
1358 		all_cpus = flush_ex.hv_vp_set.format !=
1359 			HV_GENERIC_SET_SPARSE_4K;
1360 
1361 		sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
1362 			sizeof(sparse_banks[0]);
1363 
1364 		if (!sparse_banks_len && !all_cpus)
1365 			goto ret_success;
1366 
1367 		if (!all_cpus &&
1368 		    kvm_read_guest(kvm,
1369 				   ingpa + offsetof(struct hv_tlb_flush_ex,
1370 						    hv_vp_set.bank_contents),
1371 				   sparse_banks,
1372 				   sparse_banks_len))
1373 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1374 	}
1375 
1376 	cpumask_clear(&hv_current->tlb_lush);
1377 
1378 	kvm_for_each_vcpu(i, vcpu, kvm) {
1379 		struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
1380 		int bank = hv->vp_index / 64, sbank = 0;
1381 
1382 		if (!all_cpus) {
1383 			/* Banks >64 can't be represented */
1384 			if (bank >= 64)
1385 				continue;
1386 
1387 			/* Non-ex hypercalls can only address first 64 vCPUs */
1388 			if (!ex && bank)
1389 				continue;
1390 
1391 			if (ex) {
1392 				/*
1393 				 * Check is the bank of this vCPU is in sparse
1394 				 * set and get the sparse bank number.
1395 				 */
1396 				sbank = get_sparse_bank_no(valid_bank_mask,
1397 							   bank);
1398 
1399 				if (sbank < 0)
1400 					continue;
1401 			}
1402 
1403 			if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
1404 				continue;
1405 		}
1406 
1407 		/*
1408 		 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
1409 		 * can't analyze it here, flush TLB regardless of the specified
1410 		 * address space.
1411 		 */
1412 		__set_bit(i, vcpu_bitmap);
1413 	}
1414 
1415 	kvm_make_vcpus_request_mask(kvm,
1416 				    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
1417 				    vcpu_bitmap, &hv_current->tlb_lush);
1418 
1419 ret_success:
1420 	/* We always do full TLB flush, set rep_done = rep_cnt. */
1421 	return (u64)HV_STATUS_SUCCESS |
1422 		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
1423 }
1424 
kvm_hv_hypercall_enabled(struct kvm * kvm)1425 bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1426 {
1427 	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
1428 }
1429 
kvm_hv_hypercall_set_result(struct kvm_vcpu * vcpu,u64 result)1430 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
1431 {
1432 	bool longmode;
1433 
1434 	longmode = is_64_bit_mode(vcpu);
1435 	if (longmode)
1436 		kvm_register_write(vcpu, VCPU_REGS_RAX, result);
1437 	else {
1438 		kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
1439 		kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
1440 	}
1441 }
1442 
kvm_hv_hypercall_complete(struct kvm_vcpu * vcpu,u64 result)1443 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
1444 {
1445 	kvm_hv_hypercall_set_result(vcpu, result);
1446 	++vcpu->stat.hypercalls;
1447 	return kvm_skip_emulated_instruction(vcpu);
1448 }
1449 
kvm_hv_hypercall_complete_userspace(struct kvm_vcpu * vcpu)1450 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
1451 {
1452 	return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
1453 }
1454 
kvm_hvcall_signal_event(struct kvm_vcpu * vcpu,bool fast,u64 param)1455 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
1456 {
1457 	struct eventfd_ctx *eventfd;
1458 
1459 	if (unlikely(!fast)) {
1460 		int ret;
1461 		gpa_t gpa = param;
1462 
1463 		if ((gpa & (__alignof__(param) - 1)) ||
1464 		    offset_in_page(gpa) + sizeof(param) > PAGE_SIZE)
1465 			return HV_STATUS_INVALID_ALIGNMENT;
1466 
1467 		ret = kvm_vcpu_read_guest(vcpu, gpa, &param, sizeof(param));
1468 		if (ret < 0)
1469 			return HV_STATUS_INVALID_ALIGNMENT;
1470 	}
1471 
1472 	/*
1473 	 * Per spec, bits 32-47 contain the extra "flag number".  However, we
1474 	 * have no use for it, and in all known usecases it is zero, so just
1475 	 * report lookup failure if it isn't.
1476 	 */
1477 	if (param & 0xffff00000000ULL)
1478 		return HV_STATUS_INVALID_PORT_ID;
1479 	/* remaining bits are reserved-zero */
1480 	if (param & ~KVM_HYPERV_CONN_ID_MASK)
1481 		return HV_STATUS_INVALID_HYPERCALL_INPUT;
1482 
1483 	/* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
1484 	rcu_read_lock();
1485 	eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
1486 	rcu_read_unlock();
1487 	if (!eventfd)
1488 		return HV_STATUS_INVALID_PORT_ID;
1489 
1490 	eventfd_signal(eventfd, 1);
1491 	return HV_STATUS_SUCCESS;
1492 }
1493 
kvm_hv_hypercall(struct kvm_vcpu * vcpu)1494 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1495 {
1496 	u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
1497 	uint16_t code, rep_idx, rep_cnt;
1498 	bool fast, longmode, rep;
1499 
1500 	/*
1501 	 * hypercall generates UD from non zero cpl and real mode
1502 	 * per HYPER-V spec
1503 	 */
1504 	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
1505 		kvm_queue_exception(vcpu, UD_VECTOR);
1506 		return 1;
1507 	}
1508 
1509 	longmode = is_64_bit_mode(vcpu);
1510 
1511 	if (!longmode) {
1512 		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
1513 			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
1514 		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
1515 			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
1516 		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
1517 			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
1518 	}
1519 #ifdef CONFIG_X86_64
1520 	else {
1521 		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
1522 		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
1523 		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
1524 	}
1525 #endif
1526 
1527 	code = param & 0xffff;
1528 	fast = !!(param & HV_HYPERCALL_FAST_BIT);
1529 	rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
1530 	rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
1531 	rep = !!(rep_cnt || rep_idx);
1532 
1533 	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
1534 
1535 	switch (code) {
1536 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
1537 		if (unlikely(rep)) {
1538 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1539 			break;
1540 		}
1541 		kvm_vcpu_on_spin(vcpu, true);
1542 		break;
1543 	case HVCALL_SIGNAL_EVENT:
1544 		if (unlikely(rep)) {
1545 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1546 			break;
1547 		}
1548 		ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
1549 		if (ret != HV_STATUS_INVALID_PORT_ID)
1550 			break;
1551 		/* maybe userspace knows this conn_id: fall through */
1552 	case HVCALL_POST_MESSAGE:
1553 		/* don't bother userspace if it has no way to handle it */
1554 		if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
1555 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1556 			break;
1557 		}
1558 		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
1559 		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
1560 		vcpu->run->hyperv.u.hcall.input = param;
1561 		vcpu->run->hyperv.u.hcall.params[0] = ingpa;
1562 		vcpu->run->hyperv.u.hcall.params[1] = outgpa;
1563 		vcpu->arch.complete_userspace_io =
1564 				kvm_hv_hypercall_complete_userspace;
1565 		return 0;
1566 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
1567 		if (unlikely(fast || !rep_cnt || rep_idx)) {
1568 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1569 			break;
1570 		}
1571 		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
1572 		break;
1573 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
1574 		if (unlikely(fast || rep)) {
1575 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1576 			break;
1577 		}
1578 		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
1579 		break;
1580 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
1581 		if (unlikely(fast || !rep_cnt || rep_idx)) {
1582 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1583 			break;
1584 		}
1585 		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
1586 		break;
1587 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
1588 		if (unlikely(fast || rep)) {
1589 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1590 			break;
1591 		}
1592 		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
1593 		break;
1594 	default:
1595 		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
1596 		break;
1597 	}
1598 
1599 	return kvm_hv_hypercall_complete(vcpu, ret);
1600 }
1601 
kvm_hv_init_vm(struct kvm * kvm)1602 void kvm_hv_init_vm(struct kvm *kvm)
1603 {
1604 	mutex_init(&kvm->arch.hyperv.hv_lock);
1605 	idr_init(&kvm->arch.hyperv.conn_to_evt);
1606 }
1607 
kvm_hv_destroy_vm(struct kvm * kvm)1608 void kvm_hv_destroy_vm(struct kvm *kvm)
1609 {
1610 	struct eventfd_ctx *eventfd;
1611 	int i;
1612 
1613 	idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
1614 		eventfd_ctx_put(eventfd);
1615 	idr_destroy(&kvm->arch.hyperv.conn_to_evt);
1616 }
1617 
kvm_hv_eventfd_assign(struct kvm * kvm,u32 conn_id,int fd)1618 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
1619 {
1620 	struct kvm_hv *hv = &kvm->arch.hyperv;
1621 	struct eventfd_ctx *eventfd;
1622 	int ret;
1623 
1624 	eventfd = eventfd_ctx_fdget(fd);
1625 	if (IS_ERR(eventfd))
1626 		return PTR_ERR(eventfd);
1627 
1628 	mutex_lock(&hv->hv_lock);
1629 	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
1630 			GFP_KERNEL);
1631 	mutex_unlock(&hv->hv_lock);
1632 
1633 	if (ret >= 0)
1634 		return 0;
1635 
1636 	if (ret == -ENOSPC)
1637 		ret = -EEXIST;
1638 	eventfd_ctx_put(eventfd);
1639 	return ret;
1640 }
1641 
kvm_hv_eventfd_deassign(struct kvm * kvm,u32 conn_id)1642 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
1643 {
1644 	struct kvm_hv *hv = &kvm->arch.hyperv;
1645 	struct eventfd_ctx *eventfd;
1646 
1647 	mutex_lock(&hv->hv_lock);
1648 	eventfd = idr_remove(&hv->conn_to_evt, conn_id);
1649 	mutex_unlock(&hv->hv_lock);
1650 
1651 	if (!eventfd)
1652 		return -ENOENT;
1653 
1654 	synchronize_srcu(&kvm->srcu);
1655 	eventfd_ctx_put(eventfd);
1656 	return 0;
1657 }
1658 
kvm_vm_ioctl_hv_eventfd(struct kvm * kvm,struct kvm_hyperv_eventfd * args)1659 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
1660 {
1661 	if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
1662 	    (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
1663 		return -EINVAL;
1664 
1665 	if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
1666 		return kvm_hv_eventfd_deassign(kvm, args->conn_id);
1667 	return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
1668 }
1669