• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 /*
4  * Local APIC virtualization
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright (C) 2007 Novell
8  * Copyright (C) 2007 Intel
9  * Copyright 2009 Red Hat, Inc. and/or its affiliates.
10  *
11  * Authors:
12  *   Dor Laor <dor.laor@qumranet.com>
13  *   Gregory Haskins <ghaskins@novell.com>
14  *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
15  *
16  * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
17  */
18 
19 #include <linux/kvm_host.h>
20 #include <linux/kvm.h>
21 #include <linux/mm.h>
22 #include <linux/highmem.h>
23 #include <linux/smp.h>
24 #include <linux/hrtimer.h>
25 #include <linux/io.h>
26 #include <linux/export.h>
27 #include <linux/math64.h>
28 #include <linux/slab.h>
29 #include <asm/processor.h>
30 #include <asm/msr.h>
31 #include <asm/page.h>
32 #include <asm/current.h>
33 #include <asm/apicdef.h>
34 #include <asm/delay.h>
35 #include <linux/atomic.h>
36 #include <linux/jump_label.h>
37 #include "kvm_cache_regs.h"
38 #include "irq.h"
39 #include "ioapic.h"
40 #include "trace.h"
41 #include "x86.h"
42 #include "cpuid.h"
43 #include "hyperv.h"
44 
45 #ifndef CONFIG_X86_64
46 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
47 #else
48 #define mod_64(x, y) ((x) % (y))
49 #endif
50 
51 #define PRId64 "d"
52 #define PRIx64 "llx"
53 #define PRIu64 "u"
54 #define PRIo64 "o"
55 
56 /* 14 is the version for Xeon and Pentium 8.4.8*/
57 #define APIC_VERSION			(0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
58 #define LAPIC_MMIO_LENGTH		(1 << 12)
59 /* followed define is not in apicdef.h */
60 #define MAX_APIC_VECTOR			256
61 #define APIC_VECTORS_PER_REG		32
62 
63 static bool lapic_timer_advance_dynamic __read_mostly;
64 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN	100	/* clock cycles */
65 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX	10000	/* clock cycles */
66 #define LAPIC_TIMER_ADVANCE_NS_INIT	1000
67 #define LAPIC_TIMER_ADVANCE_NS_MAX     5000
68 /* step-by-step approximation to mitigate fluctuation */
69 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
70 
apic_test_vector(int vec,void * bitmap)71 static inline int apic_test_vector(int vec, void *bitmap)
72 {
73 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
74 }
75 
kvm_apic_pending_eoi(struct kvm_vcpu * vcpu,int vector)76 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
77 {
78 	struct kvm_lapic *apic = vcpu->arch.apic;
79 
80 	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
81 		apic_test_vector(vector, apic->regs + APIC_IRR);
82 }
83 
__apic_test_and_set_vector(int vec,void * bitmap)84 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
85 {
86 	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
87 }
88 
__apic_test_and_clear_vector(int vec,void * bitmap)89 static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
90 {
91 	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
92 }
93 
94 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
95 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
96 
apic_enabled(struct kvm_lapic * apic)97 static inline int apic_enabled(struct kvm_lapic *apic)
98 {
99 	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
100 }
101 
102 #define LVT_MASK	\
103 	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
104 
105 #define LINT_MASK	\
106 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
107 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
108 
kvm_x2apic_id(struct kvm_lapic * apic)109 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
110 {
111 	return apic->vcpu->vcpu_id;
112 }
113 
kvm_can_post_timer_interrupt(struct kvm_vcpu * vcpu)114 static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
115 {
116 	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
117 		(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
118 }
119 
kvm_can_use_hv_timer(struct kvm_vcpu * vcpu)120 bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
121 {
122 	return kvm_x86_ops.set_hv_timer
123 	       && !(kvm_mwait_in_guest(vcpu->kvm) ||
124 		    kvm_can_post_timer_interrupt(vcpu));
125 }
126 EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
127 
kvm_use_posted_timer_interrupt(struct kvm_vcpu * vcpu)128 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
129 {
130 	return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
131 }
132 
kvm_apic_map_get_logical_dest(struct kvm_apic_map * map,u32 dest_id,struct kvm_lapic *** cluster,u16 * mask)133 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
134 		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
135 	switch (map->mode) {
136 	case KVM_APIC_MODE_X2APIC: {
137 		u32 offset = (dest_id >> 16) * 16;
138 		u32 max_apic_id = map->max_apic_id;
139 
140 		if (offset <= max_apic_id) {
141 			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
142 
143 			offset = array_index_nospec(offset, map->max_apic_id + 1);
144 			*cluster = &map->phys_map[offset];
145 			*mask = dest_id & (0xffff >> (16 - cluster_size));
146 		} else {
147 			*mask = 0;
148 		}
149 
150 		return true;
151 		}
152 	case KVM_APIC_MODE_XAPIC_FLAT:
153 		*cluster = map->xapic_flat_map;
154 		*mask = dest_id & 0xff;
155 		return true;
156 	case KVM_APIC_MODE_XAPIC_CLUSTER:
157 		*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
158 		*mask = dest_id & 0xf;
159 		return true;
160 	default:
161 		/* Not optimized. */
162 		return false;
163 	}
164 }
165 
kvm_apic_map_free(struct rcu_head * rcu)166 static void kvm_apic_map_free(struct rcu_head *rcu)
167 {
168 	struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
169 
170 	kvfree(map);
171 }
172 
173 /*
174  * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
175  *
176  * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
177  * apic_map_lock_held.
178  */
179 enum {
180 	CLEAN,
181 	UPDATE_IN_PROGRESS,
182 	DIRTY
183 };
184 
kvm_recalculate_apic_map(struct kvm * kvm)185 void kvm_recalculate_apic_map(struct kvm *kvm)
186 {
187 	struct kvm_apic_map *new, *old = NULL;
188 	struct kvm_vcpu *vcpu;
189 	int i;
190 	u32 max_id = 255; /* enough space for any xAPIC ID */
191 
192 	/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
193 	if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
194 		return;
195 
196 	WARN_ONCE(!irqchip_in_kernel(kvm),
197 		  "Dirty APIC map without an in-kernel local APIC");
198 
199 	mutex_lock(&kvm->arch.apic_map_lock);
200 	/*
201 	 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
202 	 * (if clean) or the APIC registers (if dirty).
203 	 */
204 	if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
205 				   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
206 		/* Someone else has updated the map. */
207 		mutex_unlock(&kvm->arch.apic_map_lock);
208 		return;
209 	}
210 
211 	kvm_for_each_vcpu(i, vcpu, kvm)
212 		if (kvm_apic_present(vcpu))
213 			max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
214 
215 	new = kvzalloc(sizeof(struct kvm_apic_map) +
216 	                   sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
217 			   GFP_KERNEL_ACCOUNT);
218 
219 	if (!new)
220 		goto out;
221 
222 	new->max_apic_id = max_id;
223 
224 	kvm_for_each_vcpu(i, vcpu, kvm) {
225 		struct kvm_lapic *apic = vcpu->arch.apic;
226 		struct kvm_lapic **cluster;
227 		u16 mask;
228 		u32 ldr;
229 		u8 xapic_id;
230 		u32 x2apic_id;
231 
232 		if (!kvm_apic_present(vcpu))
233 			continue;
234 
235 		xapic_id = kvm_xapic_id(apic);
236 		x2apic_id = kvm_x2apic_id(apic);
237 
238 		/* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
239 		if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
240 				x2apic_id <= new->max_apic_id)
241 			new->phys_map[x2apic_id] = apic;
242 		/*
243 		 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
244 		 * prevent them from masking VCPUs with APIC ID <= 0xff.
245 		 */
246 		if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
247 			new->phys_map[xapic_id] = apic;
248 
249 		if (!kvm_apic_sw_enabled(apic))
250 			continue;
251 
252 		ldr = kvm_lapic_get_reg(apic, APIC_LDR);
253 
254 		if (apic_x2apic_mode(apic)) {
255 			new->mode |= KVM_APIC_MODE_X2APIC;
256 		} else if (ldr) {
257 			ldr = GET_APIC_LOGICAL_ID(ldr);
258 			if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
259 				new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
260 			else
261 				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
262 		}
263 
264 		if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
265 			continue;
266 
267 		if (mask)
268 			cluster[ffs(mask) - 1] = apic;
269 	}
270 out:
271 	old = rcu_dereference_protected(kvm->arch.apic_map,
272 			lockdep_is_held(&kvm->arch.apic_map_lock));
273 	rcu_assign_pointer(kvm->arch.apic_map, new);
274 	/*
275 	 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
276 	 * If another update has come in, leave it DIRTY.
277 	 */
278 	atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
279 			       UPDATE_IN_PROGRESS, CLEAN);
280 	mutex_unlock(&kvm->arch.apic_map_lock);
281 
282 	if (old)
283 		call_rcu(&old->rcu, kvm_apic_map_free);
284 
285 	kvm_make_scan_ioapic_request(kvm);
286 }
287 
apic_set_spiv(struct kvm_lapic * apic,u32 val)288 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
289 {
290 	bool enabled = val & APIC_SPIV_APIC_ENABLED;
291 
292 	kvm_lapic_set_reg(apic, APIC_SPIV, val);
293 
294 	if (enabled != apic->sw_enabled) {
295 		apic->sw_enabled = enabled;
296 		if (enabled)
297 			static_branch_slow_dec_deferred(&apic_sw_disabled);
298 		else
299 			static_branch_inc(&apic_sw_disabled.key);
300 
301 		atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
302 	}
303 
304 	/* Check if there are APF page ready requests pending */
305 	if (enabled)
306 		kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
307 }
308 
kvm_apic_set_xapic_id(struct kvm_lapic * apic,u8 id)309 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
310 {
311 	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
312 	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
313 }
314 
kvm_apic_set_ldr(struct kvm_lapic * apic,u32 id)315 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
316 {
317 	kvm_lapic_set_reg(apic, APIC_LDR, id);
318 	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
319 }
320 
kvm_apic_set_dfr(struct kvm_lapic * apic,u32 val)321 static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
322 {
323 	kvm_lapic_set_reg(apic, APIC_DFR, val);
324 	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
325 }
326 
kvm_apic_calc_x2apic_ldr(u32 id)327 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
328 {
329 	return ((id >> 4) << 16) | (1 << (id & 0xf));
330 }
331 
kvm_apic_set_x2apic_id(struct kvm_lapic * apic,u32 id)332 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
333 {
334 	u32 ldr = kvm_apic_calc_x2apic_ldr(id);
335 
336 	WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
337 
338 	kvm_lapic_set_reg(apic, APIC_ID, id);
339 	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
340 	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
341 }
342 
apic_lvt_enabled(struct kvm_lapic * apic,int lvt_type)343 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
344 {
345 	return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
346 }
347 
apic_lvtt_oneshot(struct kvm_lapic * apic)348 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
349 {
350 	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
351 }
352 
apic_lvtt_period(struct kvm_lapic * apic)353 static inline int apic_lvtt_period(struct kvm_lapic *apic)
354 {
355 	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
356 }
357 
apic_lvtt_tscdeadline(struct kvm_lapic * apic)358 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
359 {
360 	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
361 }
362 
apic_lvt_nmi_mode(u32 lvt_val)363 static inline int apic_lvt_nmi_mode(u32 lvt_val)
364 {
365 	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
366 }
367 
kvm_apic_set_version(struct kvm_vcpu * vcpu)368 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
369 {
370 	struct kvm_lapic *apic = vcpu->arch.apic;
371 	u32 v = APIC_VERSION;
372 
373 	if (!lapic_in_kernel(vcpu))
374 		return;
375 
376 	/*
377 	 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
378 	 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
379 	 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
380 	 * version first and level-triggered interrupts never get EOIed in
381 	 * IOAPIC.
382 	 */
383 	if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
384 	    !ioapic_in_kernel(vcpu->kvm))
385 		v |= APIC_LVR_DIRECTED_EOI;
386 	kvm_lapic_set_reg(apic, APIC_LVR, v);
387 }
388 
389 static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
390 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
391 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
392 	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
393 	LINT_MASK, LINT_MASK,	/* LVT0-1 */
394 	LVT_MASK		/* LVTERR */
395 };
396 
find_highest_vector(void * bitmap)397 static int find_highest_vector(void *bitmap)
398 {
399 	int vec;
400 	u32 *reg;
401 
402 	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
403 	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
404 		reg = bitmap + REG_POS(vec);
405 		if (*reg)
406 			return __fls(*reg) + vec;
407 	}
408 
409 	return -1;
410 }
411 
count_vectors(void * bitmap)412 static u8 count_vectors(void *bitmap)
413 {
414 	int vec;
415 	u32 *reg;
416 	u8 count = 0;
417 
418 	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
419 		reg = bitmap + REG_POS(vec);
420 		count += hweight32(*reg);
421 	}
422 
423 	return count;
424 }
425 
__kvm_apic_update_irr(u32 * pir,void * regs,int * max_irr)426 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
427 {
428 	u32 i, vec;
429 	u32 pir_val, irr_val, prev_irr_val;
430 	int max_updated_irr;
431 
432 	max_updated_irr = -1;
433 	*max_irr = -1;
434 
435 	for (i = vec = 0; i <= 7; i++, vec += 32) {
436 		pir_val = READ_ONCE(pir[i]);
437 		irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
438 		if (pir_val) {
439 			prev_irr_val = irr_val;
440 			irr_val |= xchg(&pir[i], 0);
441 			*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
442 			if (prev_irr_val != irr_val) {
443 				max_updated_irr =
444 					__fls(irr_val ^ prev_irr_val) + vec;
445 			}
446 		}
447 		if (irr_val)
448 			*max_irr = __fls(irr_val) + vec;
449 	}
450 
451 	return ((max_updated_irr != -1) &&
452 		(max_updated_irr == *max_irr));
453 }
454 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
455 
kvm_apic_update_irr(struct kvm_vcpu * vcpu,u32 * pir,int * max_irr)456 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
457 {
458 	struct kvm_lapic *apic = vcpu->arch.apic;
459 
460 	return __kvm_apic_update_irr(pir, apic->regs, max_irr);
461 }
462 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
463 
apic_search_irr(struct kvm_lapic * apic)464 static inline int apic_search_irr(struct kvm_lapic *apic)
465 {
466 	return find_highest_vector(apic->regs + APIC_IRR);
467 }
468 
apic_find_highest_irr(struct kvm_lapic * apic)469 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
470 {
471 	int result;
472 
473 	/*
474 	 * Note that irr_pending is just a hint. It will be always
475 	 * true with virtual interrupt delivery enabled.
476 	 */
477 	if (!apic->irr_pending)
478 		return -1;
479 
480 	result = apic_search_irr(apic);
481 	ASSERT(result == -1 || result >= 16);
482 
483 	return result;
484 }
485 
apic_clear_irr(int vec,struct kvm_lapic * apic)486 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
487 {
488 	struct kvm_vcpu *vcpu;
489 
490 	vcpu = apic->vcpu;
491 
492 	if (unlikely(vcpu->arch.apicv_active)) {
493 		/* need to update RVI */
494 		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
495 		static_call(kvm_x86_hwapic_irr_update)(vcpu,
496 				apic_find_highest_irr(apic));
497 	} else {
498 		apic->irr_pending = false;
499 		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
500 		if (apic_search_irr(apic) != -1)
501 			apic->irr_pending = true;
502 	}
503 }
504 
kvm_apic_clear_irr(struct kvm_vcpu * vcpu,int vec)505 void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
506 {
507 	apic_clear_irr(vec, vcpu->arch.apic);
508 }
509 EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
510 
apic_set_isr(int vec,struct kvm_lapic * apic)511 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
512 {
513 	struct kvm_vcpu *vcpu;
514 
515 	if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
516 		return;
517 
518 	vcpu = apic->vcpu;
519 
520 	/*
521 	 * With APIC virtualization enabled, all caching is disabled
522 	 * because the processor can modify ISR under the hood.  Instead
523 	 * just set SVI.
524 	 */
525 	if (unlikely(vcpu->arch.apicv_active))
526 		static_call(kvm_x86_hwapic_isr_update)(vcpu, vec);
527 	else {
528 		++apic->isr_count;
529 		BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
530 		/*
531 		 * ISR (in service register) bit is set when injecting an interrupt.
532 		 * The highest vector is injected. Thus the latest bit set matches
533 		 * the highest bit in ISR.
534 		 */
535 		apic->highest_isr_cache = vec;
536 	}
537 }
538 
apic_find_highest_isr(struct kvm_lapic * apic)539 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
540 {
541 	int result;
542 
543 	/*
544 	 * Note that isr_count is always 1, and highest_isr_cache
545 	 * is always -1, with APIC virtualization enabled.
546 	 */
547 	if (!apic->isr_count)
548 		return -1;
549 	if (likely(apic->highest_isr_cache != -1))
550 		return apic->highest_isr_cache;
551 
552 	result = find_highest_vector(apic->regs + APIC_ISR);
553 	ASSERT(result == -1 || result >= 16);
554 
555 	return result;
556 }
557 
apic_clear_isr(int vec,struct kvm_lapic * apic)558 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
559 {
560 	struct kvm_vcpu *vcpu;
561 	if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
562 		return;
563 
564 	vcpu = apic->vcpu;
565 
566 	/*
567 	 * We do get here for APIC virtualization enabled if the guest
568 	 * uses the Hyper-V APIC enlightenment.  In this case we may need
569 	 * to trigger a new interrupt delivery by writing the SVI field;
570 	 * on the other hand isr_count and highest_isr_cache are unused
571 	 * and must be left alone.
572 	 */
573 	if (unlikely(vcpu->arch.apicv_active))
574 		static_call(kvm_x86_hwapic_isr_update)(vcpu,
575 						apic_find_highest_isr(apic));
576 	else {
577 		--apic->isr_count;
578 		BUG_ON(apic->isr_count < 0);
579 		apic->highest_isr_cache = -1;
580 	}
581 }
582 
kvm_lapic_find_highest_irr(struct kvm_vcpu * vcpu)583 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
584 {
585 	/* This may race with setting of irr in __apic_accept_irq() and
586 	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
587 	 * will cause vmexit immediately and the value will be recalculated
588 	 * on the next vmentry.
589 	 */
590 	return apic_find_highest_irr(vcpu->arch.apic);
591 }
592 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
593 
594 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
595 			     int vector, int level, int trig_mode,
596 			     struct dest_map *dest_map);
597 
kvm_apic_set_irq(struct kvm_vcpu * vcpu,struct kvm_lapic_irq * irq,struct dest_map * dest_map)598 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
599 		     struct dest_map *dest_map)
600 {
601 	struct kvm_lapic *apic = vcpu->arch.apic;
602 
603 	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
604 			irq->level, irq->trig_mode, dest_map);
605 }
606 
__pv_send_ipi(unsigned long * ipi_bitmap,struct kvm_apic_map * map,struct kvm_lapic_irq * irq,u32 min)607 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
608 			 struct kvm_lapic_irq *irq, u32 min)
609 {
610 	int i, count = 0;
611 	struct kvm_vcpu *vcpu;
612 
613 	if (min > map->max_apic_id)
614 		return 0;
615 
616 	for_each_set_bit(i, ipi_bitmap,
617 		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
618 		if (map->phys_map[min + i]) {
619 			vcpu = map->phys_map[min + i]->vcpu;
620 			count += kvm_apic_set_irq(vcpu, irq, NULL);
621 		}
622 	}
623 
624 	return count;
625 }
626 
kvm_pv_send_ipi(struct kvm * kvm,unsigned long ipi_bitmap_low,unsigned long ipi_bitmap_high,u32 min,unsigned long icr,int op_64_bit)627 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
628 		    unsigned long ipi_bitmap_high, u32 min,
629 		    unsigned long icr, int op_64_bit)
630 {
631 	struct kvm_apic_map *map;
632 	struct kvm_lapic_irq irq = {0};
633 	int cluster_size = op_64_bit ? 64 : 32;
634 	int count;
635 
636 	if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
637 		return -KVM_EINVAL;
638 
639 	irq.vector = icr & APIC_VECTOR_MASK;
640 	irq.delivery_mode = icr & APIC_MODE_MASK;
641 	irq.level = (icr & APIC_INT_ASSERT) != 0;
642 	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
643 
644 	rcu_read_lock();
645 	map = rcu_dereference(kvm->arch.apic_map);
646 
647 	count = -EOPNOTSUPP;
648 	if (likely(map)) {
649 		count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
650 		min += cluster_size;
651 		count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
652 	}
653 
654 	rcu_read_unlock();
655 	return count;
656 }
657 
pv_eoi_put_user(struct kvm_vcpu * vcpu,u8 val)658 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
659 {
660 
661 	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
662 				      sizeof(val));
663 }
664 
pv_eoi_get_user(struct kvm_vcpu * vcpu,u8 * val)665 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
666 {
667 
668 	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
669 				      sizeof(*val));
670 }
671 
pv_eoi_enabled(struct kvm_vcpu * vcpu)672 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
673 {
674 	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
675 }
676 
pv_eoi_get_pending(struct kvm_vcpu * vcpu)677 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
678 {
679 	u8 val;
680 	if (pv_eoi_get_user(vcpu, &val) < 0)
681 		return false;
682 
683 	return val & KVM_PV_EOI_ENABLED;
684 }
685 
pv_eoi_set_pending(struct kvm_vcpu * vcpu)686 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
687 {
688 	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
689 		return;
690 
691 	__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
692 }
693 
pv_eoi_clr_pending(struct kvm_vcpu * vcpu)694 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
695 {
696 	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
697 		return;
698 
699 	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
700 }
701 
apic_has_interrupt_for_ppr(struct kvm_lapic * apic,u32 ppr)702 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
703 {
704 	int highest_irr;
705 	if (kvm_x86_ops.sync_pir_to_irr)
706 		highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
707 	else
708 		highest_irr = apic_find_highest_irr(apic);
709 	if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
710 		return -1;
711 	return highest_irr;
712 }
713 
__apic_update_ppr(struct kvm_lapic * apic,u32 * new_ppr)714 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
715 {
716 	u32 tpr, isrv, ppr, old_ppr;
717 	int isr;
718 
719 	old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
720 	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
721 	isr = apic_find_highest_isr(apic);
722 	isrv = (isr != -1) ? isr : 0;
723 
724 	if ((tpr & 0xf0) >= (isrv & 0xf0))
725 		ppr = tpr & 0xff;
726 	else
727 		ppr = isrv & 0xf0;
728 
729 	*new_ppr = ppr;
730 	if (old_ppr != ppr)
731 		kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
732 
733 	return ppr < old_ppr;
734 }
735 
apic_update_ppr(struct kvm_lapic * apic)736 static void apic_update_ppr(struct kvm_lapic *apic)
737 {
738 	u32 ppr;
739 
740 	if (__apic_update_ppr(apic, &ppr) &&
741 	    apic_has_interrupt_for_ppr(apic, ppr) != -1)
742 		kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
743 }
744 
kvm_apic_update_ppr(struct kvm_vcpu * vcpu)745 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
746 {
747 	apic_update_ppr(vcpu->arch.apic);
748 }
749 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
750 
apic_set_tpr(struct kvm_lapic * apic,u32 tpr)751 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
752 {
753 	kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
754 	apic_update_ppr(apic);
755 }
756 
kvm_apic_broadcast(struct kvm_lapic * apic,u32 mda)757 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
758 {
759 	return mda == (apic_x2apic_mode(apic) ?
760 			X2APIC_BROADCAST : APIC_BROADCAST);
761 }
762 
kvm_apic_match_physical_addr(struct kvm_lapic * apic,u32 mda)763 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
764 {
765 	if (kvm_apic_broadcast(apic, mda))
766 		return true;
767 
768 	if (apic_x2apic_mode(apic))
769 		return mda == kvm_x2apic_id(apic);
770 
771 	/*
772 	 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
773 	 * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
774 	 * this allows unique addressing of VCPUs with APIC ID over 0xff.
775 	 * The 0xff condition is needed because writeable xAPIC ID.
776 	 */
777 	if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
778 		return true;
779 
780 	return mda == kvm_xapic_id(apic);
781 }
782 
kvm_apic_match_logical_addr(struct kvm_lapic * apic,u32 mda)783 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
784 {
785 	u32 logical_id;
786 
787 	if (kvm_apic_broadcast(apic, mda))
788 		return true;
789 
790 	logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
791 
792 	if (apic_x2apic_mode(apic))
793 		return ((logical_id >> 16) == (mda >> 16))
794 		       && (logical_id & mda & 0xffff) != 0;
795 
796 	logical_id = GET_APIC_LOGICAL_ID(logical_id);
797 
798 	switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
799 	case APIC_DFR_FLAT:
800 		return (logical_id & mda) != 0;
801 	case APIC_DFR_CLUSTER:
802 		return ((logical_id >> 4) == (mda >> 4))
803 		       && (logical_id & mda & 0xf) != 0;
804 	default:
805 		return false;
806 	}
807 }
808 
809 /* The KVM local APIC implementation has two quirks:
810  *
811  *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
812  *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
813  *    KVM doesn't do that aliasing.
814  *
815  *  - in-kernel IOAPIC messages have to be delivered directly to
816  *    x2APIC, because the kernel does not support interrupt remapping.
817  *    In order to support broadcast without interrupt remapping, x2APIC
818  *    rewrites the destination of non-IPI messages from APIC_BROADCAST
819  *    to X2APIC_BROADCAST.
820  *
821  * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
822  * important when userspace wants to use x2APIC-format MSIs, because
823  * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
824  */
kvm_apic_mda(struct kvm_vcpu * vcpu,unsigned int dest_id,struct kvm_lapic * source,struct kvm_lapic * target)825 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
826 		struct kvm_lapic *source, struct kvm_lapic *target)
827 {
828 	bool ipi = source != NULL;
829 
830 	if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
831 	    !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
832 		return X2APIC_BROADCAST;
833 
834 	return dest_id;
835 }
836 
kvm_apic_match_dest(struct kvm_vcpu * vcpu,struct kvm_lapic * source,int shorthand,unsigned int dest,int dest_mode)837 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
838 			   int shorthand, unsigned int dest, int dest_mode)
839 {
840 	struct kvm_lapic *target = vcpu->arch.apic;
841 	u32 mda = kvm_apic_mda(vcpu, dest, source, target);
842 
843 	ASSERT(target);
844 	switch (shorthand) {
845 	case APIC_DEST_NOSHORT:
846 		if (dest_mode == APIC_DEST_PHYSICAL)
847 			return kvm_apic_match_physical_addr(target, mda);
848 		else
849 			return kvm_apic_match_logical_addr(target, mda);
850 	case APIC_DEST_SELF:
851 		return target == source;
852 	case APIC_DEST_ALLINC:
853 		return true;
854 	case APIC_DEST_ALLBUT:
855 		return target != source;
856 	default:
857 		return false;
858 	}
859 }
860 EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
861 
kvm_vector_to_index(u32 vector,u32 dest_vcpus,const unsigned long * bitmap,u32 bitmap_size)862 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
863 		       const unsigned long *bitmap, u32 bitmap_size)
864 {
865 	u32 mod;
866 	int i, idx = -1;
867 
868 	mod = vector % dest_vcpus;
869 
870 	for (i = 0; i <= mod; i++) {
871 		idx = find_next_bit(bitmap, bitmap_size, idx + 1);
872 		BUG_ON(idx == bitmap_size);
873 	}
874 
875 	return idx;
876 }
877 
kvm_apic_disabled_lapic_found(struct kvm * kvm)878 static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
879 {
880 	if (!kvm->arch.disabled_lapic_found) {
881 		kvm->arch.disabled_lapic_found = true;
882 		printk(KERN_INFO
883 		       "Disabled LAPIC found during irq injection\n");
884 	}
885 }
886 
kvm_apic_is_broadcast_dest(struct kvm * kvm,struct kvm_lapic ** src,struct kvm_lapic_irq * irq,struct kvm_apic_map * map)887 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
888 		struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
889 {
890 	if (kvm->arch.x2apic_broadcast_quirk_disabled) {
891 		if ((irq->dest_id == APIC_BROADCAST &&
892 				map->mode != KVM_APIC_MODE_X2APIC))
893 			return true;
894 		if (irq->dest_id == X2APIC_BROADCAST)
895 			return true;
896 	} else {
897 		bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
898 		if (irq->dest_id == (x2apic_ipi ?
899 		                     X2APIC_BROADCAST : APIC_BROADCAST))
900 			return true;
901 	}
902 
903 	return false;
904 }
905 
906 /* Return true if the interrupt can be handled by using *bitmap as index mask
907  * for valid destinations in *dst array.
908  * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
909  * Note: we may have zero kvm_lapic destinations when we return true, which
910  * means that the interrupt should be dropped.  In this case, *bitmap would be
911  * zero and *dst undefined.
912  */
kvm_apic_map_get_dest_lapic(struct kvm * kvm,struct kvm_lapic ** src,struct kvm_lapic_irq * irq,struct kvm_apic_map * map,struct kvm_lapic *** dst,unsigned long * bitmap)913 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
914 		struct kvm_lapic **src, struct kvm_lapic_irq *irq,
915 		struct kvm_apic_map *map, struct kvm_lapic ***dst,
916 		unsigned long *bitmap)
917 {
918 	int i, lowest;
919 
920 	if (irq->shorthand == APIC_DEST_SELF && src) {
921 		*dst = src;
922 		*bitmap = 1;
923 		return true;
924 	} else if (irq->shorthand)
925 		return false;
926 
927 	if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
928 		return false;
929 
930 	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
931 		if (irq->dest_id > map->max_apic_id) {
932 			*bitmap = 0;
933 		} else {
934 			u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
935 			*dst = &map->phys_map[dest_id];
936 			*bitmap = 1;
937 		}
938 		return true;
939 	}
940 
941 	*bitmap = 0;
942 	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
943 				(u16 *)bitmap))
944 		return false;
945 
946 	if (!kvm_lowest_prio_delivery(irq))
947 		return true;
948 
949 	if (!kvm_vector_hashing_enabled()) {
950 		lowest = -1;
951 		for_each_set_bit(i, bitmap, 16) {
952 			if (!(*dst)[i])
953 				continue;
954 			if (lowest < 0)
955 				lowest = i;
956 			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
957 						(*dst)[lowest]->vcpu) < 0)
958 				lowest = i;
959 		}
960 	} else {
961 		if (!*bitmap)
962 			return true;
963 
964 		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
965 				bitmap, 16);
966 
967 		if (!(*dst)[lowest]) {
968 			kvm_apic_disabled_lapic_found(kvm);
969 			*bitmap = 0;
970 			return true;
971 		}
972 	}
973 
974 	*bitmap = (lowest >= 0) ? 1 << lowest : 0;
975 
976 	return true;
977 }
978 
kvm_irq_delivery_to_apic_fast(struct kvm * kvm,struct kvm_lapic * src,struct kvm_lapic_irq * irq,int * r,struct dest_map * dest_map)979 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
980 		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
981 {
982 	struct kvm_apic_map *map;
983 	unsigned long bitmap;
984 	struct kvm_lapic **dst = NULL;
985 	int i;
986 	bool ret;
987 
988 	*r = -1;
989 
990 	if (irq->shorthand == APIC_DEST_SELF) {
991 		if (KVM_BUG_ON(!src, kvm)) {
992 			*r = 0;
993 			return true;
994 		}
995 		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
996 		return true;
997 	}
998 
999 	rcu_read_lock();
1000 	map = rcu_dereference(kvm->arch.apic_map);
1001 
1002 	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1003 	if (ret) {
1004 		*r = 0;
1005 		for_each_set_bit(i, &bitmap, 16) {
1006 			if (!dst[i])
1007 				continue;
1008 			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1009 		}
1010 	}
1011 
1012 	rcu_read_unlock();
1013 	return ret;
1014 }
1015 
1016 /*
1017  * This routine tries to handle interrupts in posted mode, here is how
1018  * it deals with different cases:
1019  * - For single-destination interrupts, handle it in posted mode
1020  * - Else if vector hashing is enabled and it is a lowest-priority
1021  *   interrupt, handle it in posted mode and use the following mechanism
1022  *   to find the destination vCPU.
1023  *	1. For lowest-priority interrupts, store all the possible
1024  *	   destination vCPUs in an array.
1025  *	2. Use "guest vector % max number of destination vCPUs" to find
1026  *	   the right destination vCPU in the array for the lowest-priority
1027  *	   interrupt.
1028  * - Otherwise, use remapped mode to inject the interrupt.
1029  */
kvm_intr_is_single_vcpu_fast(struct kvm * kvm,struct kvm_lapic_irq * irq,struct kvm_vcpu ** dest_vcpu)1030 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1031 			struct kvm_vcpu **dest_vcpu)
1032 {
1033 	struct kvm_apic_map *map;
1034 	unsigned long bitmap;
1035 	struct kvm_lapic **dst = NULL;
1036 	bool ret = false;
1037 
1038 	if (irq->shorthand)
1039 		return false;
1040 
1041 	rcu_read_lock();
1042 	map = rcu_dereference(kvm->arch.apic_map);
1043 
1044 	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1045 			hweight16(bitmap) == 1) {
1046 		unsigned long i = find_first_bit(&bitmap, 16);
1047 
1048 		if (dst[i]) {
1049 			*dest_vcpu = dst[i]->vcpu;
1050 			ret = true;
1051 		}
1052 	}
1053 
1054 	rcu_read_unlock();
1055 	return ret;
1056 }
1057 
1058 /*
1059  * Add a pending IRQ into lapic.
1060  * Return 1 if successfully added and 0 if discarded.
1061  */
__apic_accept_irq(struct kvm_lapic * apic,int delivery_mode,int vector,int level,int trig_mode,struct dest_map * dest_map)1062 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1063 			     int vector, int level, int trig_mode,
1064 			     struct dest_map *dest_map)
1065 {
1066 	int result = 0;
1067 	struct kvm_vcpu *vcpu = apic->vcpu;
1068 
1069 	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1070 				  trig_mode, vector);
1071 	switch (delivery_mode) {
1072 	case APIC_DM_LOWEST:
1073 		vcpu->arch.apic_arb_prio++;
1074 		fallthrough;
1075 	case APIC_DM_FIXED:
1076 		if (unlikely(trig_mode && !level))
1077 			break;
1078 
1079 		/* FIXME add logic for vcpu on reset */
1080 		if (unlikely(!apic_enabled(apic)))
1081 			break;
1082 
1083 		result = 1;
1084 
1085 		if (dest_map) {
1086 			__set_bit(vcpu->vcpu_id, dest_map->map);
1087 			dest_map->vectors[vcpu->vcpu_id] = vector;
1088 		}
1089 
1090 		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1091 			if (trig_mode)
1092 				kvm_lapic_set_vector(vector,
1093 						     apic->regs + APIC_TMR);
1094 			else
1095 				kvm_lapic_clear_vector(vector,
1096 						       apic->regs + APIC_TMR);
1097 		}
1098 
1099 		if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
1100 			kvm_lapic_set_irr(vector, apic);
1101 			kvm_make_request(KVM_REQ_EVENT, vcpu);
1102 			kvm_vcpu_kick(vcpu);
1103 		}
1104 		break;
1105 
1106 	case APIC_DM_REMRD:
1107 		result = 1;
1108 		vcpu->arch.pv.pv_unhalted = 1;
1109 		kvm_make_request(KVM_REQ_EVENT, vcpu);
1110 		kvm_vcpu_kick(vcpu);
1111 		break;
1112 
1113 	case APIC_DM_SMI:
1114 		result = 1;
1115 		kvm_make_request(KVM_REQ_SMI, vcpu);
1116 		kvm_vcpu_kick(vcpu);
1117 		break;
1118 
1119 	case APIC_DM_NMI:
1120 		result = 1;
1121 		kvm_inject_nmi(vcpu);
1122 		kvm_vcpu_kick(vcpu);
1123 		break;
1124 
1125 	case APIC_DM_INIT:
1126 		if (!trig_mode || level) {
1127 			result = 1;
1128 			/* assumes that there are only KVM_APIC_INIT/SIPI */
1129 			apic->pending_events = (1UL << KVM_APIC_INIT);
1130 			kvm_make_request(KVM_REQ_EVENT, vcpu);
1131 			kvm_vcpu_kick(vcpu);
1132 		}
1133 		break;
1134 
1135 	case APIC_DM_STARTUP:
1136 		result = 1;
1137 		apic->sipi_vector = vector;
1138 		/* make sure sipi_vector is visible for the receiver */
1139 		smp_wmb();
1140 		set_bit(KVM_APIC_SIPI, &apic->pending_events);
1141 		kvm_make_request(KVM_REQ_EVENT, vcpu);
1142 		kvm_vcpu_kick(vcpu);
1143 		break;
1144 
1145 	case APIC_DM_EXTINT:
1146 		/*
1147 		 * Should only be called by kvm_apic_local_deliver() with LVT0,
1148 		 * before NMI watchdog was enabled. Already handled by
1149 		 * kvm_apic_accept_pic_intr().
1150 		 */
1151 		break;
1152 
1153 	default:
1154 		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1155 		       delivery_mode);
1156 		break;
1157 	}
1158 	return result;
1159 }
1160 
1161 /*
1162  * This routine identifies the destination vcpus mask meant to receive the
1163  * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1164  * out the destination vcpus array and set the bitmap or it traverses to
1165  * each available vcpu to identify the same.
1166  */
kvm_bitmap_or_dest_vcpus(struct kvm * kvm,struct kvm_lapic_irq * irq,unsigned long * vcpu_bitmap)1167 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1168 			      unsigned long *vcpu_bitmap)
1169 {
1170 	struct kvm_lapic **dest_vcpu = NULL;
1171 	struct kvm_lapic *src = NULL;
1172 	struct kvm_apic_map *map;
1173 	struct kvm_vcpu *vcpu;
1174 	unsigned long bitmap;
1175 	int i, vcpu_idx;
1176 	bool ret;
1177 
1178 	rcu_read_lock();
1179 	map = rcu_dereference(kvm->arch.apic_map);
1180 
1181 	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1182 					  &bitmap);
1183 	if (ret) {
1184 		for_each_set_bit(i, &bitmap, 16) {
1185 			if (!dest_vcpu[i])
1186 				continue;
1187 			vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1188 			__set_bit(vcpu_idx, vcpu_bitmap);
1189 		}
1190 	} else {
1191 		kvm_for_each_vcpu(i, vcpu, kvm) {
1192 			if (!kvm_apic_present(vcpu))
1193 				continue;
1194 			if (!kvm_apic_match_dest(vcpu, NULL,
1195 						 irq->shorthand,
1196 						 irq->dest_id,
1197 						 irq->dest_mode))
1198 				continue;
1199 			__set_bit(i, vcpu_bitmap);
1200 		}
1201 	}
1202 	rcu_read_unlock();
1203 }
1204 
kvm_apic_compare_prio(struct kvm_vcpu * vcpu1,struct kvm_vcpu * vcpu2)1205 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1206 {
1207 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1208 }
1209 
kvm_ioapic_handles_vector(struct kvm_lapic * apic,int vector)1210 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1211 {
1212 	return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1213 }
1214 
kvm_ioapic_send_eoi(struct kvm_lapic * apic,int vector)1215 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1216 {
1217 	int trigger_mode;
1218 
1219 	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
1220 	if (!kvm_ioapic_handles_vector(apic, vector))
1221 		return;
1222 
1223 	/* Request a KVM exit to inform the userspace IOAPIC. */
1224 	if (irqchip_split(apic->vcpu->kvm)) {
1225 		apic->vcpu->arch.pending_ioapic_eoi = vector;
1226 		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1227 		return;
1228 	}
1229 
1230 	if (apic_test_vector(vector, apic->regs + APIC_TMR))
1231 		trigger_mode = IOAPIC_LEVEL_TRIG;
1232 	else
1233 		trigger_mode = IOAPIC_EDGE_TRIG;
1234 
1235 	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1236 }
1237 
apic_set_eoi(struct kvm_lapic * apic)1238 static int apic_set_eoi(struct kvm_lapic *apic)
1239 {
1240 	int vector = apic_find_highest_isr(apic);
1241 
1242 	trace_kvm_eoi(apic, vector);
1243 
1244 	/*
1245 	 * Not every write EOI will has corresponding ISR,
1246 	 * one example is when Kernel check timer on setup_IO_APIC
1247 	 */
1248 	if (vector == -1)
1249 		return vector;
1250 
1251 	apic_clear_isr(vector, apic);
1252 	apic_update_ppr(apic);
1253 
1254 	if (to_hv_vcpu(apic->vcpu) &&
1255 	    test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
1256 		kvm_hv_synic_send_eoi(apic->vcpu, vector);
1257 
1258 	kvm_ioapic_send_eoi(apic, vector);
1259 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1260 	return vector;
1261 }
1262 
1263 /*
1264  * this interface assumes a trap-like exit, which has already finished
1265  * desired side effect including vISR and vPPR update.
1266  */
kvm_apic_set_eoi_accelerated(struct kvm_vcpu * vcpu,int vector)1267 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1268 {
1269 	struct kvm_lapic *apic = vcpu->arch.apic;
1270 
1271 	trace_kvm_eoi(apic, vector);
1272 
1273 	kvm_ioapic_send_eoi(apic, vector);
1274 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1275 }
1276 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1277 
kvm_apic_send_ipi(struct kvm_lapic * apic,u32 icr_low,u32 icr_high)1278 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1279 {
1280 	struct kvm_lapic_irq irq;
1281 
1282 	irq.vector = icr_low & APIC_VECTOR_MASK;
1283 	irq.delivery_mode = icr_low & APIC_MODE_MASK;
1284 	irq.dest_mode = icr_low & APIC_DEST_MASK;
1285 	irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1286 	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1287 	irq.shorthand = icr_low & APIC_SHORT_MASK;
1288 	irq.msi_redir_hint = false;
1289 	if (apic_x2apic_mode(apic))
1290 		irq.dest_id = icr_high;
1291 	else
1292 		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1293 
1294 	trace_kvm_apic_ipi(icr_low, irq.dest_id);
1295 
1296 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1297 }
1298 EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
1299 
apic_get_tmcct(struct kvm_lapic * apic)1300 static u32 apic_get_tmcct(struct kvm_lapic *apic)
1301 {
1302 	ktime_t remaining, now;
1303 	s64 ns;
1304 	u32 tmcct;
1305 
1306 	ASSERT(apic != NULL);
1307 
1308 	/* if initial count is 0, current count should also be 0 */
1309 	if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1310 		apic->lapic_timer.period == 0)
1311 		return 0;
1312 
1313 	now = ktime_get();
1314 	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1315 	if (ktime_to_ns(remaining) < 0)
1316 		remaining = 0;
1317 
1318 	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1319 	tmcct = div64_u64(ns,
1320 			 (APIC_BUS_CYCLE_NS * apic->divide_count));
1321 
1322 	return tmcct;
1323 }
1324 
__report_tpr_access(struct kvm_lapic * apic,bool write)1325 static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1326 {
1327 	struct kvm_vcpu *vcpu = apic->vcpu;
1328 	struct kvm_run *run = vcpu->run;
1329 
1330 	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1331 	run->tpr_access.rip = kvm_rip_read(vcpu);
1332 	run->tpr_access.is_write = write;
1333 }
1334 
report_tpr_access(struct kvm_lapic * apic,bool write)1335 static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1336 {
1337 	if (apic->vcpu->arch.tpr_access_reporting)
1338 		__report_tpr_access(apic, write);
1339 }
1340 
__apic_read(struct kvm_lapic * apic,unsigned int offset)1341 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1342 {
1343 	u32 val = 0;
1344 
1345 	if (offset >= LAPIC_MMIO_LENGTH)
1346 		return 0;
1347 
1348 	switch (offset) {
1349 	case APIC_ARBPRI:
1350 		break;
1351 
1352 	case APIC_TMCCT:	/* Timer CCR */
1353 		if (apic_lvtt_tscdeadline(apic))
1354 			return 0;
1355 
1356 		val = apic_get_tmcct(apic);
1357 		break;
1358 	case APIC_PROCPRI:
1359 		apic_update_ppr(apic);
1360 		val = kvm_lapic_get_reg(apic, offset);
1361 		break;
1362 	case APIC_TASKPRI:
1363 		report_tpr_access(apic, false);
1364 		fallthrough;
1365 	default:
1366 		val = kvm_lapic_get_reg(apic, offset);
1367 		break;
1368 	}
1369 
1370 	return val;
1371 }
1372 
to_lapic(struct kvm_io_device * dev)1373 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1374 {
1375 	return container_of(dev, struct kvm_lapic, dev);
1376 }
1377 
1378 #define APIC_REG_MASK(reg)	(1ull << ((reg) >> 4))
1379 #define APIC_REGS_MASK(first, count) \
1380 	(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1381 
kvm_lapic_reg_read(struct kvm_lapic * apic,u32 offset,int len,void * data)1382 int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1383 		void *data)
1384 {
1385 	unsigned char alignment = offset & 0xf;
1386 	u32 result;
1387 	/* this bitmask has a bit cleared for each reserved register */
1388 	u64 valid_reg_mask =
1389 		APIC_REG_MASK(APIC_ID) |
1390 		APIC_REG_MASK(APIC_LVR) |
1391 		APIC_REG_MASK(APIC_TASKPRI) |
1392 		APIC_REG_MASK(APIC_PROCPRI) |
1393 		APIC_REG_MASK(APIC_LDR) |
1394 		APIC_REG_MASK(APIC_DFR) |
1395 		APIC_REG_MASK(APIC_SPIV) |
1396 		APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1397 		APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1398 		APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1399 		APIC_REG_MASK(APIC_ESR) |
1400 		APIC_REG_MASK(APIC_ICR) |
1401 		APIC_REG_MASK(APIC_ICR2) |
1402 		APIC_REG_MASK(APIC_LVTT) |
1403 		APIC_REG_MASK(APIC_LVTTHMR) |
1404 		APIC_REG_MASK(APIC_LVTPC) |
1405 		APIC_REG_MASK(APIC_LVT0) |
1406 		APIC_REG_MASK(APIC_LVT1) |
1407 		APIC_REG_MASK(APIC_LVTERR) |
1408 		APIC_REG_MASK(APIC_TMICT) |
1409 		APIC_REG_MASK(APIC_TMCCT) |
1410 		APIC_REG_MASK(APIC_TDCR);
1411 
1412 	/* ARBPRI is not valid on x2APIC */
1413 	if (!apic_x2apic_mode(apic))
1414 		valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
1415 
1416 	if (alignment + len > 4)
1417 		return 1;
1418 
1419 	if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1420 		return 1;
1421 
1422 	result = __apic_read(apic, offset & ~0xf);
1423 
1424 	trace_kvm_apic_read(offset, result);
1425 
1426 	switch (len) {
1427 	case 1:
1428 	case 2:
1429 	case 4:
1430 		memcpy(data, (char *)&result + alignment, len);
1431 		break;
1432 	default:
1433 		printk(KERN_ERR "Local APIC read with len = %x, "
1434 		       "should be 1,2, or 4 instead\n", len);
1435 		break;
1436 	}
1437 	return 0;
1438 }
1439 EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1440 
apic_mmio_in_range(struct kvm_lapic * apic,gpa_t addr)1441 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1442 {
1443 	return addr >= apic->base_address &&
1444 		addr < apic->base_address + LAPIC_MMIO_LENGTH;
1445 }
1446 
apic_mmio_read(struct kvm_vcpu * vcpu,struct kvm_io_device * this,gpa_t address,int len,void * data)1447 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1448 			   gpa_t address, int len, void *data)
1449 {
1450 	struct kvm_lapic *apic = to_lapic(this);
1451 	u32 offset = address - apic->base_address;
1452 
1453 	if (!apic_mmio_in_range(apic, address))
1454 		return -EOPNOTSUPP;
1455 
1456 	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1457 		if (!kvm_check_has_quirk(vcpu->kvm,
1458 					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1459 			return -EOPNOTSUPP;
1460 
1461 		memset(data, 0xff, len);
1462 		return 0;
1463 	}
1464 
1465 	kvm_lapic_reg_read(apic, offset, len, data);
1466 
1467 	return 0;
1468 }
1469 
update_divide_count(struct kvm_lapic * apic)1470 static void update_divide_count(struct kvm_lapic *apic)
1471 {
1472 	u32 tmp1, tmp2, tdcr;
1473 
1474 	tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1475 	tmp1 = tdcr & 0xf;
1476 	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1477 	apic->divide_count = 0x1 << (tmp2 & 0x7);
1478 }
1479 
limit_periodic_timer_frequency(struct kvm_lapic * apic)1480 static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1481 {
1482 	/*
1483 	 * Do not allow the guest to program periodic timers with small
1484 	 * interval, since the hrtimers are not throttled by the host
1485 	 * scheduler.
1486 	 */
1487 	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1488 		s64 min_period = min_timer_period_us * 1000LL;
1489 
1490 		if (apic->lapic_timer.period < min_period) {
1491 			pr_info_ratelimited(
1492 			    "kvm: vcpu %i: requested %lld ns "
1493 			    "lapic timer period limited to %lld ns\n",
1494 			    apic->vcpu->vcpu_id,
1495 			    apic->lapic_timer.period, min_period);
1496 			apic->lapic_timer.period = min_period;
1497 		}
1498 	}
1499 }
1500 
1501 static void cancel_hv_timer(struct kvm_lapic *apic);
1502 
cancel_apic_timer(struct kvm_lapic * apic)1503 static void cancel_apic_timer(struct kvm_lapic *apic)
1504 {
1505 	hrtimer_cancel(&apic->lapic_timer.timer);
1506 	preempt_disable();
1507 	if (apic->lapic_timer.hv_timer_in_use)
1508 		cancel_hv_timer(apic);
1509 	preempt_enable();
1510 	atomic_set(&apic->lapic_timer.pending, 0);
1511 }
1512 
apic_update_lvtt(struct kvm_lapic * apic)1513 static void apic_update_lvtt(struct kvm_lapic *apic)
1514 {
1515 	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1516 			apic->lapic_timer.timer_mode_mask;
1517 
1518 	if (apic->lapic_timer.timer_mode != timer_mode) {
1519 		if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1520 				APIC_LVT_TIMER_TSCDEADLINE)) {
1521 			cancel_apic_timer(apic);
1522 			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1523 			apic->lapic_timer.period = 0;
1524 			apic->lapic_timer.tscdeadline = 0;
1525 		}
1526 		apic->lapic_timer.timer_mode = timer_mode;
1527 		limit_periodic_timer_frequency(apic);
1528 	}
1529 }
1530 
1531 /*
1532  * On APICv, this test will cause a busy wait
1533  * during a higher-priority task.
1534  */
1535 
lapic_timer_int_injected(struct kvm_vcpu * vcpu)1536 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1537 {
1538 	struct kvm_lapic *apic = vcpu->arch.apic;
1539 	u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1540 
1541 	if (kvm_apic_hw_enabled(apic)) {
1542 		int vec = reg & APIC_VECTOR_MASK;
1543 		void *bitmap = apic->regs + APIC_ISR;
1544 
1545 		if (vcpu->arch.apicv_active)
1546 			bitmap = apic->regs + APIC_IRR;
1547 
1548 		if (apic_test_vector(vec, bitmap))
1549 			return true;
1550 	}
1551 	return false;
1552 }
1553 
__wait_lapic_expire(struct kvm_vcpu * vcpu,u64 guest_cycles)1554 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1555 {
1556 	u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1557 
1558 	/*
1559 	 * If the guest TSC is running at a different ratio than the host, then
1560 	 * convert the delay to nanoseconds to achieve an accurate delay.  Note
1561 	 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1562 	 * always for VMX enabled hardware.
1563 	 */
1564 	if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1565 		__delay(min(guest_cycles,
1566 			nsec_to_cycles(vcpu, timer_advance_ns)));
1567 	} else {
1568 		u64 delay_ns = guest_cycles * 1000000ULL;
1569 		do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1570 		ndelay(min_t(u32, delay_ns, timer_advance_ns));
1571 	}
1572 }
1573 
adjust_lapic_timer_advance(struct kvm_vcpu * vcpu,s64 advance_expire_delta)1574 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1575 					      s64 advance_expire_delta)
1576 {
1577 	struct kvm_lapic *apic = vcpu->arch.apic;
1578 	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1579 	u64 ns;
1580 
1581 	/* Do not adjust for tiny fluctuations or large random spikes. */
1582 	if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1583 	    abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1584 		return;
1585 
1586 	/* too early */
1587 	if (advance_expire_delta < 0) {
1588 		ns = -advance_expire_delta * 1000000ULL;
1589 		do_div(ns, vcpu->arch.virtual_tsc_khz);
1590 		timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1591 	} else {
1592 	/* too late */
1593 		ns = advance_expire_delta * 1000000ULL;
1594 		do_div(ns, vcpu->arch.virtual_tsc_khz);
1595 		timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1596 	}
1597 
1598 	if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1599 		timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1600 	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1601 }
1602 
__kvm_wait_lapic_expire(struct kvm_vcpu * vcpu)1603 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1604 {
1605 	struct kvm_lapic *apic = vcpu->arch.apic;
1606 	u64 guest_tsc, tsc_deadline;
1607 
1608 	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1609 	apic->lapic_timer.expired_tscdeadline = 0;
1610 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1611 	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1612 
1613 	if (lapic_timer_advance_dynamic) {
1614 		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1615 		/*
1616 		 * If the timer fired early, reread the TSC to account for the
1617 		 * overhead of the above adjustment to avoid waiting longer
1618 		 * than is necessary.
1619 		 */
1620 		if (guest_tsc < tsc_deadline)
1621 			guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1622 	}
1623 
1624 	if (guest_tsc < tsc_deadline)
1625 		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1626 }
1627 
kvm_wait_lapic_expire(struct kvm_vcpu * vcpu)1628 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1629 {
1630 	if (lapic_in_kernel(vcpu) &&
1631 	    vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1632 	    vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1633 	    lapic_timer_int_injected(vcpu))
1634 		__kvm_wait_lapic_expire(vcpu);
1635 }
1636 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1637 
kvm_apic_inject_pending_timer_irqs(struct kvm_lapic * apic)1638 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1639 {
1640 	struct kvm_timer *ktimer = &apic->lapic_timer;
1641 
1642 	kvm_apic_local_deliver(apic, APIC_LVTT);
1643 	if (apic_lvtt_tscdeadline(apic)) {
1644 		ktimer->tscdeadline = 0;
1645 	} else if (apic_lvtt_oneshot(apic)) {
1646 		ktimer->tscdeadline = 0;
1647 		ktimer->target_expiration = 0;
1648 	}
1649 }
1650 
apic_timer_expired(struct kvm_lapic * apic,bool from_timer_fn)1651 static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1652 {
1653 	struct kvm_vcpu *vcpu = apic->vcpu;
1654 	struct kvm_timer *ktimer = &apic->lapic_timer;
1655 
1656 	if (atomic_read(&apic->lapic_timer.pending))
1657 		return;
1658 
1659 	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1660 		ktimer->expired_tscdeadline = ktimer->tscdeadline;
1661 
1662 	if (!from_timer_fn && vcpu->arch.apicv_active) {
1663 		WARN_ON(kvm_get_running_vcpu() != vcpu);
1664 		kvm_apic_inject_pending_timer_irqs(apic);
1665 		return;
1666 	}
1667 
1668 	if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1669 		/*
1670 		 * Ensure the guest's timer has truly expired before posting an
1671 		 * interrupt.  Open code the relevant checks to avoid querying
1672 		 * lapic_timer_int_injected(), which will be false since the
1673 		 * interrupt isn't yet injected.  Waiting until after injecting
1674 		 * is not an option since that won't help a posted interrupt.
1675 		 */
1676 		if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1677 		    vcpu->arch.apic->lapic_timer.timer_advance_ns)
1678 			__kvm_wait_lapic_expire(vcpu);
1679 		kvm_apic_inject_pending_timer_irqs(apic);
1680 		return;
1681 	}
1682 
1683 	atomic_inc(&apic->lapic_timer.pending);
1684 	kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
1685 	if (from_timer_fn)
1686 		kvm_vcpu_kick(vcpu);
1687 }
1688 
start_sw_tscdeadline(struct kvm_lapic * apic)1689 static void start_sw_tscdeadline(struct kvm_lapic *apic)
1690 {
1691 	struct kvm_timer *ktimer = &apic->lapic_timer;
1692 	u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1693 	u64 ns = 0;
1694 	ktime_t expire;
1695 	struct kvm_vcpu *vcpu = apic->vcpu;
1696 	unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1697 	unsigned long flags;
1698 	ktime_t now;
1699 
1700 	if (unlikely(!tscdeadline || !this_tsc_khz))
1701 		return;
1702 
1703 	local_irq_save(flags);
1704 
1705 	now = ktime_get();
1706 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1707 
1708 	ns = (tscdeadline - guest_tsc) * 1000000ULL;
1709 	do_div(ns, this_tsc_khz);
1710 
1711 	if (likely(tscdeadline > guest_tsc) &&
1712 	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
1713 		expire = ktime_add_ns(now, ns);
1714 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1715 		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1716 	} else
1717 		apic_timer_expired(apic, false);
1718 
1719 	local_irq_restore(flags);
1720 }
1721 
tmict_to_ns(struct kvm_lapic * apic,u32 tmict)1722 static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1723 {
1724 	return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
1725 }
1726 
update_target_expiration(struct kvm_lapic * apic,uint32_t old_divisor)1727 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1728 {
1729 	ktime_t now, remaining;
1730 	u64 ns_remaining_old, ns_remaining_new;
1731 
1732 	apic->lapic_timer.period =
1733 			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1734 	limit_periodic_timer_frequency(apic);
1735 
1736 	now = ktime_get();
1737 	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1738 	if (ktime_to_ns(remaining) < 0)
1739 		remaining = 0;
1740 
1741 	ns_remaining_old = ktime_to_ns(remaining);
1742 	ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1743 	                                   apic->divide_count, old_divisor);
1744 
1745 	apic->lapic_timer.tscdeadline +=
1746 		nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1747 		nsec_to_cycles(apic->vcpu, ns_remaining_old);
1748 	apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1749 }
1750 
set_target_expiration(struct kvm_lapic * apic,u32 count_reg)1751 static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
1752 {
1753 	ktime_t now;
1754 	u64 tscl = rdtsc();
1755 	s64 deadline;
1756 
1757 	now = ktime_get();
1758 	apic->lapic_timer.period =
1759 			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1760 
1761 	if (!apic->lapic_timer.period) {
1762 		apic->lapic_timer.tscdeadline = 0;
1763 		return false;
1764 	}
1765 
1766 	limit_periodic_timer_frequency(apic);
1767 	deadline = apic->lapic_timer.period;
1768 
1769 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1770 		if (unlikely(count_reg != APIC_TMICT)) {
1771 			deadline = tmict_to_ns(apic,
1772 				     kvm_lapic_get_reg(apic, count_reg));
1773 			if (unlikely(deadline <= 0))
1774 				deadline = apic->lapic_timer.period;
1775 			else if (unlikely(deadline > apic->lapic_timer.period)) {
1776 				pr_info_ratelimited(
1777 				    "kvm: vcpu %i: requested lapic timer restore with "
1778 				    "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1779 				    "Using initial count to start timer.\n",
1780 				    apic->vcpu->vcpu_id,
1781 				    count_reg,
1782 				    kvm_lapic_get_reg(apic, count_reg),
1783 				    deadline, apic->lapic_timer.period);
1784 				kvm_lapic_set_reg(apic, count_reg, 0);
1785 				deadline = apic->lapic_timer.period;
1786 			}
1787 		}
1788 	}
1789 
1790 	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1791 		nsec_to_cycles(apic->vcpu, deadline);
1792 	apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
1793 
1794 	return true;
1795 }
1796 
advance_periodic_target_expiration(struct kvm_lapic * apic)1797 static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1798 {
1799 	ktime_t now = ktime_get();
1800 	u64 tscl = rdtsc();
1801 	ktime_t delta;
1802 
1803 	/*
1804 	 * Synchronize both deadlines to the same time source or
1805 	 * differences in the periods (caused by differences in the
1806 	 * underlying clocks or numerical approximation errors) will
1807 	 * cause the two to drift apart over time as the errors
1808 	 * accumulate.
1809 	 */
1810 	apic->lapic_timer.target_expiration =
1811 		ktime_add_ns(apic->lapic_timer.target_expiration,
1812 				apic->lapic_timer.period);
1813 	delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1814 	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1815 		nsec_to_cycles(apic->vcpu, delta);
1816 }
1817 
start_sw_period(struct kvm_lapic * apic)1818 static void start_sw_period(struct kvm_lapic *apic)
1819 {
1820 	if (!apic->lapic_timer.period)
1821 		return;
1822 
1823 	if (ktime_after(ktime_get(),
1824 			apic->lapic_timer.target_expiration)) {
1825 		apic_timer_expired(apic, false);
1826 
1827 		if (apic_lvtt_oneshot(apic))
1828 			return;
1829 
1830 		advance_periodic_target_expiration(apic);
1831 	}
1832 
1833 	hrtimer_start(&apic->lapic_timer.timer,
1834 		apic->lapic_timer.target_expiration,
1835 		HRTIMER_MODE_ABS_HARD);
1836 }
1837 
kvm_lapic_hv_timer_in_use(struct kvm_vcpu * vcpu)1838 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1839 {
1840 	if (!lapic_in_kernel(vcpu))
1841 		return false;
1842 
1843 	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1844 }
1845 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1846 
cancel_hv_timer(struct kvm_lapic * apic)1847 static void cancel_hv_timer(struct kvm_lapic *apic)
1848 {
1849 	WARN_ON(preemptible());
1850 	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1851 	static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
1852 	apic->lapic_timer.hv_timer_in_use = false;
1853 }
1854 
start_hv_timer(struct kvm_lapic * apic)1855 static bool start_hv_timer(struct kvm_lapic *apic)
1856 {
1857 	struct kvm_timer *ktimer = &apic->lapic_timer;
1858 	struct kvm_vcpu *vcpu = apic->vcpu;
1859 	bool expired;
1860 
1861 	WARN_ON(preemptible());
1862 	if (!kvm_can_use_hv_timer(vcpu))
1863 		return false;
1864 
1865 	if (!ktimer->tscdeadline)
1866 		return false;
1867 
1868 	if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
1869 		return false;
1870 
1871 	ktimer->hv_timer_in_use = true;
1872 	hrtimer_cancel(&ktimer->timer);
1873 
1874 	/*
1875 	 * To simplify handling the periodic timer, leave the hv timer running
1876 	 * even if the deadline timer has expired, i.e. rely on the resulting
1877 	 * VM-Exit to recompute the periodic timer's target expiration.
1878 	 */
1879 	if (!apic_lvtt_period(apic)) {
1880 		/*
1881 		 * Cancel the hv timer if the sw timer fired while the hv timer
1882 		 * was being programmed, or if the hv timer itself expired.
1883 		 */
1884 		if (atomic_read(&ktimer->pending)) {
1885 			cancel_hv_timer(apic);
1886 		} else if (expired) {
1887 			apic_timer_expired(apic, false);
1888 			cancel_hv_timer(apic);
1889 		}
1890 	}
1891 
1892 	trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1893 
1894 	return true;
1895 }
1896 
start_sw_timer(struct kvm_lapic * apic)1897 static void start_sw_timer(struct kvm_lapic *apic)
1898 {
1899 	struct kvm_timer *ktimer = &apic->lapic_timer;
1900 
1901 	WARN_ON(preemptible());
1902 	if (apic->lapic_timer.hv_timer_in_use)
1903 		cancel_hv_timer(apic);
1904 	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1905 		return;
1906 
1907 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1908 		start_sw_period(apic);
1909 	else if (apic_lvtt_tscdeadline(apic))
1910 		start_sw_tscdeadline(apic);
1911 	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1912 }
1913 
restart_apic_timer(struct kvm_lapic * apic)1914 static void restart_apic_timer(struct kvm_lapic *apic)
1915 {
1916 	preempt_disable();
1917 
1918 	if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1919 		goto out;
1920 
1921 	if (!start_hv_timer(apic))
1922 		start_sw_timer(apic);
1923 out:
1924 	preempt_enable();
1925 }
1926 
kvm_lapic_expired_hv_timer(struct kvm_vcpu * vcpu)1927 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1928 {
1929 	struct kvm_lapic *apic = vcpu->arch.apic;
1930 
1931 	preempt_disable();
1932 	/* If the preempt notifier has already run, it also called apic_timer_expired */
1933 	if (!apic->lapic_timer.hv_timer_in_use)
1934 		goto out;
1935 	WARN_ON(rcuwait_active(&vcpu->wait));
1936 	apic_timer_expired(apic, false);
1937 	cancel_hv_timer(apic);
1938 
1939 	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1940 		advance_periodic_target_expiration(apic);
1941 		restart_apic_timer(apic);
1942 	}
1943 out:
1944 	preempt_enable();
1945 }
1946 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1947 
kvm_lapic_switch_to_hv_timer(struct kvm_vcpu * vcpu)1948 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1949 {
1950 	restart_apic_timer(vcpu->arch.apic);
1951 }
1952 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1953 
kvm_lapic_switch_to_sw_timer(struct kvm_vcpu * vcpu)1954 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1955 {
1956 	struct kvm_lapic *apic = vcpu->arch.apic;
1957 
1958 	preempt_disable();
1959 	/* Possibly the TSC deadline timer is not enabled yet */
1960 	if (apic->lapic_timer.hv_timer_in_use)
1961 		start_sw_timer(apic);
1962 	preempt_enable();
1963 }
1964 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1965 
kvm_lapic_restart_hv_timer(struct kvm_vcpu * vcpu)1966 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1967 {
1968 	struct kvm_lapic *apic = vcpu->arch.apic;
1969 
1970 	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1971 	restart_apic_timer(apic);
1972 }
1973 
__start_apic_timer(struct kvm_lapic * apic,u32 count_reg)1974 static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
1975 {
1976 	atomic_set(&apic->lapic_timer.pending, 0);
1977 
1978 	if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1979 	    && !set_target_expiration(apic, count_reg))
1980 		return;
1981 
1982 	restart_apic_timer(apic);
1983 }
1984 
start_apic_timer(struct kvm_lapic * apic)1985 static void start_apic_timer(struct kvm_lapic *apic)
1986 {
1987 	__start_apic_timer(apic, APIC_TMICT);
1988 }
1989 
apic_manage_nmi_watchdog(struct kvm_lapic * apic,u32 lvt0_val)1990 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1991 {
1992 	bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1993 
1994 	if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1995 		apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1996 		if (lvt0_in_nmi_mode) {
1997 			atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1998 		} else
1999 			atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2000 	}
2001 }
2002 
kvm_lapic_reg_write(struct kvm_lapic * apic,u32 reg,u32 val)2003 int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
2004 {
2005 	int ret = 0;
2006 
2007 	trace_kvm_apic_write(reg, val);
2008 
2009 	switch (reg) {
2010 	case APIC_ID:		/* Local APIC ID */
2011 		if (!apic_x2apic_mode(apic))
2012 			kvm_apic_set_xapic_id(apic, val >> 24);
2013 		else
2014 			ret = 1;
2015 		break;
2016 
2017 	case APIC_TASKPRI:
2018 		report_tpr_access(apic, true);
2019 		apic_set_tpr(apic, val & 0xff);
2020 		break;
2021 
2022 	case APIC_EOI:
2023 		apic_set_eoi(apic);
2024 		break;
2025 
2026 	case APIC_LDR:
2027 		if (!apic_x2apic_mode(apic))
2028 			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2029 		else
2030 			ret = 1;
2031 		break;
2032 
2033 	case APIC_DFR:
2034 		if (!apic_x2apic_mode(apic))
2035 			kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2036 		else
2037 			ret = 1;
2038 		break;
2039 
2040 	case APIC_SPIV: {
2041 		u32 mask = 0x3ff;
2042 		if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2043 			mask |= APIC_SPIV_DIRECTED_EOI;
2044 		apic_set_spiv(apic, val & mask);
2045 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
2046 			int i;
2047 			u32 lvt_val;
2048 
2049 			for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
2050 				lvt_val = kvm_lapic_get_reg(apic,
2051 						       APIC_LVTT + 0x10 * i);
2052 				kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
2053 					     lvt_val | APIC_LVT_MASKED);
2054 			}
2055 			apic_update_lvtt(apic);
2056 			atomic_set(&apic->lapic_timer.pending, 0);
2057 
2058 		}
2059 		break;
2060 	}
2061 	case APIC_ICR:
2062 		/* No delay here, so we always clear the pending bit */
2063 		val &= ~(1 << 12);
2064 		kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2065 		kvm_lapic_set_reg(apic, APIC_ICR, val);
2066 		break;
2067 
2068 	case APIC_ICR2:
2069 		if (!apic_x2apic_mode(apic))
2070 			val &= 0xff000000;
2071 		kvm_lapic_set_reg(apic, APIC_ICR2, val);
2072 		break;
2073 
2074 	case APIC_LVT0:
2075 		apic_manage_nmi_watchdog(apic, val);
2076 		fallthrough;
2077 	case APIC_LVTTHMR:
2078 	case APIC_LVTPC:
2079 	case APIC_LVT1:
2080 	case APIC_LVTERR: {
2081 		/* TODO: Check vector */
2082 		size_t size;
2083 		u32 index;
2084 
2085 		if (!kvm_apic_sw_enabled(apic))
2086 			val |= APIC_LVT_MASKED;
2087 		size = ARRAY_SIZE(apic_lvt_mask);
2088 		index = array_index_nospec(
2089 				(reg - APIC_LVTT) >> 4, size);
2090 		val &= apic_lvt_mask[index];
2091 		kvm_lapic_set_reg(apic, reg, val);
2092 		break;
2093 	}
2094 
2095 	case APIC_LVTT:
2096 		if (!kvm_apic_sw_enabled(apic))
2097 			val |= APIC_LVT_MASKED;
2098 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
2099 		kvm_lapic_set_reg(apic, APIC_LVTT, val);
2100 		apic_update_lvtt(apic);
2101 		break;
2102 
2103 	case APIC_TMICT:
2104 		if (apic_lvtt_tscdeadline(apic))
2105 			break;
2106 
2107 		cancel_apic_timer(apic);
2108 		kvm_lapic_set_reg(apic, APIC_TMICT, val);
2109 		start_apic_timer(apic);
2110 		break;
2111 
2112 	case APIC_TDCR: {
2113 		uint32_t old_divisor = apic->divide_count;
2114 
2115 		kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2116 		update_divide_count(apic);
2117 		if (apic->divide_count != old_divisor &&
2118 				apic->lapic_timer.period) {
2119 			hrtimer_cancel(&apic->lapic_timer.timer);
2120 			update_target_expiration(apic, old_divisor);
2121 			restart_apic_timer(apic);
2122 		}
2123 		break;
2124 	}
2125 	case APIC_ESR:
2126 		if (apic_x2apic_mode(apic) && val != 0)
2127 			ret = 1;
2128 		break;
2129 
2130 	case APIC_SELF_IPI:
2131 		/*
2132 		 * Self-IPI exists only when x2APIC is enabled.  Bits 7:0 hold
2133 		 * the vector, everything else is reserved.
2134 		 */
2135 		if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
2136 			ret = 1;
2137 		else
2138 			kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
2139 		break;
2140 	default:
2141 		ret = 1;
2142 		break;
2143 	}
2144 
2145 	kvm_recalculate_apic_map(apic->vcpu->kvm);
2146 
2147 	return ret;
2148 }
2149 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
2150 
apic_mmio_write(struct kvm_vcpu * vcpu,struct kvm_io_device * this,gpa_t address,int len,const void * data)2151 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2152 			    gpa_t address, int len, const void *data)
2153 {
2154 	struct kvm_lapic *apic = to_lapic(this);
2155 	unsigned int offset = address - apic->base_address;
2156 	u32 val;
2157 
2158 	if (!apic_mmio_in_range(apic, address))
2159 		return -EOPNOTSUPP;
2160 
2161 	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2162 		if (!kvm_check_has_quirk(vcpu->kvm,
2163 					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2164 			return -EOPNOTSUPP;
2165 
2166 		return 0;
2167 	}
2168 
2169 	/*
2170 	 * APIC register must be aligned on 128-bits boundary.
2171 	 * 32/64/128 bits registers must be accessed thru 32 bits.
2172 	 * Refer SDM 8.4.1
2173 	 */
2174 	if (len != 4 || (offset & 0xf))
2175 		return 0;
2176 
2177 	val = *(u32*)data;
2178 
2179 	kvm_lapic_reg_write(apic, offset & 0xff0, val);
2180 
2181 	return 0;
2182 }
2183 
kvm_lapic_set_eoi(struct kvm_vcpu * vcpu)2184 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2185 {
2186 	kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2187 }
2188 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2189 
2190 /* emulate APIC access in a trap manner */
kvm_apic_write_nodecode(struct kvm_vcpu * vcpu,u32 offset)2191 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2192 {
2193 	u32 val = 0;
2194 
2195 	/* hw has done the conditional check and inst decode */
2196 	offset &= 0xff0;
2197 
2198 	kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2199 
2200 	/* TODO: optimize to just emulate side effect w/o one more write */
2201 	kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2202 }
2203 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2204 
kvm_free_lapic(struct kvm_vcpu * vcpu)2205 void kvm_free_lapic(struct kvm_vcpu *vcpu)
2206 {
2207 	struct kvm_lapic *apic = vcpu->arch.apic;
2208 
2209 	if (!vcpu->arch.apic)
2210 		return;
2211 
2212 	hrtimer_cancel(&apic->lapic_timer.timer);
2213 
2214 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2215 		static_branch_slow_dec_deferred(&apic_hw_disabled);
2216 
2217 	if (!apic->sw_enabled)
2218 		static_branch_slow_dec_deferred(&apic_sw_disabled);
2219 
2220 	if (apic->regs)
2221 		free_page((unsigned long)apic->regs);
2222 
2223 	kfree(apic);
2224 }
2225 
2226 /*
2227  *----------------------------------------------------------------------
2228  * LAPIC interface
2229  *----------------------------------------------------------------------
2230  */
kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu * vcpu)2231 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2232 {
2233 	struct kvm_lapic *apic = vcpu->arch.apic;
2234 
2235 	if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2236 		return 0;
2237 
2238 	return apic->lapic_timer.tscdeadline;
2239 }
2240 
kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu * vcpu,u64 data)2241 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2242 {
2243 	struct kvm_lapic *apic = vcpu->arch.apic;
2244 
2245 	if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2246 		return;
2247 
2248 	hrtimer_cancel(&apic->lapic_timer.timer);
2249 	apic->lapic_timer.tscdeadline = data;
2250 	start_apic_timer(apic);
2251 }
2252 
kvm_lapic_set_tpr(struct kvm_vcpu * vcpu,unsigned long cr8)2253 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2254 {
2255 	apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
2256 }
2257 
kvm_lapic_get_cr8(struct kvm_vcpu * vcpu)2258 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2259 {
2260 	u64 tpr;
2261 
2262 	tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2263 
2264 	return (tpr & 0xf0) >> 4;
2265 }
2266 
kvm_lapic_set_base(struct kvm_vcpu * vcpu,u64 value)2267 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2268 {
2269 	u64 old_value = vcpu->arch.apic_base;
2270 	struct kvm_lapic *apic = vcpu->arch.apic;
2271 
2272 	vcpu->arch.apic_base = value;
2273 
2274 	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2275 		kvm_update_cpuid_runtime(vcpu);
2276 
2277 	if (!apic)
2278 		return;
2279 
2280 	/* update jump label if enable bit changes */
2281 	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2282 		if (value & MSR_IA32_APICBASE_ENABLE) {
2283 			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2284 			static_branch_slow_dec_deferred(&apic_hw_disabled);
2285 			/* Check if there are APF page ready requests pending */
2286 			kvm_make_request(KVM_REQ_APF_READY, vcpu);
2287 		} else {
2288 			static_branch_inc(&apic_hw_disabled.key);
2289 			atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2290 		}
2291 	}
2292 
2293 	if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2294 		kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2295 
2296 	if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2297 		static_call(kvm_x86_set_virtual_apic_mode)(vcpu);
2298 
2299 	apic->base_address = apic->vcpu->arch.apic_base &
2300 			     MSR_IA32_APICBASE_BASE;
2301 
2302 	if ((value & MSR_IA32_APICBASE_ENABLE) &&
2303 	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
2304 		pr_warn_once("APIC base relocation is unsupported by KVM");
2305 }
2306 
kvm_apic_update_apicv(struct kvm_vcpu * vcpu)2307 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2308 {
2309 	struct kvm_lapic *apic = vcpu->arch.apic;
2310 
2311 	if (vcpu->arch.apicv_active) {
2312 		/* irr_pending is always true when apicv is activated. */
2313 		apic->irr_pending = true;
2314 		apic->isr_count = 1;
2315 	} else {
2316 		apic->irr_pending = (apic_search_irr(apic) != -1);
2317 		apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2318 	}
2319 	apic->highest_isr_cache = -1;
2320 }
2321 EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
2322 
kvm_lapic_reset(struct kvm_vcpu * vcpu,bool init_event)2323 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2324 {
2325 	struct kvm_lapic *apic = vcpu->arch.apic;
2326 	u64 msr_val;
2327 	int i;
2328 
2329 	if (!init_event) {
2330 		msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
2331 		if (kvm_vcpu_is_reset_bsp(vcpu))
2332 			msr_val |= MSR_IA32_APICBASE_BSP;
2333 		kvm_lapic_set_base(vcpu, msr_val);
2334 	}
2335 
2336 	if (!apic)
2337 		return;
2338 
2339 	/* Stop the timer in case it's a reset to an active apic */
2340 	hrtimer_cancel(&apic->lapic_timer.timer);
2341 
2342 	/* The xAPIC ID is set at RESET even if the APIC was already enabled. */
2343 	if (!init_event)
2344 		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2345 	kvm_apic_set_version(apic->vcpu);
2346 
2347 	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2348 		kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2349 	apic_update_lvtt(apic);
2350 	if (kvm_vcpu_is_reset_bsp(vcpu) &&
2351 	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2352 		kvm_lapic_set_reg(apic, APIC_LVT0,
2353 			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2354 	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2355 
2356 	kvm_apic_set_dfr(apic, 0xffffffffU);
2357 	apic_set_spiv(apic, 0xff);
2358 	kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2359 	if (!apic_x2apic_mode(apic))
2360 		kvm_apic_set_ldr(apic, 0);
2361 	kvm_lapic_set_reg(apic, APIC_ESR, 0);
2362 	kvm_lapic_set_reg(apic, APIC_ICR, 0);
2363 	kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2364 	kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2365 	kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2366 	for (i = 0; i < 8; i++) {
2367 		kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2368 		kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2369 		kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2370 	}
2371 	kvm_apic_update_apicv(vcpu);
2372 	update_divide_count(apic);
2373 	atomic_set(&apic->lapic_timer.pending, 0);
2374 
2375 	vcpu->arch.pv_eoi.msr_val = 0;
2376 	apic_update_ppr(apic);
2377 	if (vcpu->arch.apicv_active) {
2378 		static_call(kvm_x86_apicv_post_state_restore)(vcpu);
2379 		static_call(kvm_x86_hwapic_irr_update)(vcpu, -1);
2380 		static_call(kvm_x86_hwapic_isr_update)(vcpu, -1);
2381 	}
2382 
2383 	vcpu->arch.apic_arb_prio = 0;
2384 	vcpu->arch.apic_attention = 0;
2385 
2386 	kvm_recalculate_apic_map(vcpu->kvm);
2387 }
2388 
2389 /*
2390  *----------------------------------------------------------------------
2391  * timer interface
2392  *----------------------------------------------------------------------
2393  */
2394 
lapic_is_periodic(struct kvm_lapic * apic)2395 static bool lapic_is_periodic(struct kvm_lapic *apic)
2396 {
2397 	return apic_lvtt_period(apic);
2398 }
2399 
apic_has_pending_timer(struct kvm_vcpu * vcpu)2400 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2401 {
2402 	struct kvm_lapic *apic = vcpu->arch.apic;
2403 
2404 	if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2405 		return atomic_read(&apic->lapic_timer.pending);
2406 
2407 	return 0;
2408 }
2409 
kvm_apic_local_deliver(struct kvm_lapic * apic,int lvt_type)2410 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2411 {
2412 	u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2413 	int vector, mode, trig_mode;
2414 	int r;
2415 
2416 	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2417 		vector = reg & APIC_VECTOR_MASK;
2418 		mode = reg & APIC_MODE_MASK;
2419 		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2420 
2421 		r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
2422 		if (r && lvt_type == APIC_LVTPC)
2423 			kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
2424 		return r;
2425 	}
2426 	return 0;
2427 }
2428 
kvm_apic_nmi_wd_deliver(struct kvm_vcpu * vcpu)2429 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2430 {
2431 	struct kvm_lapic *apic = vcpu->arch.apic;
2432 
2433 	if (apic)
2434 		kvm_apic_local_deliver(apic, APIC_LVT0);
2435 }
2436 
2437 static const struct kvm_io_device_ops apic_mmio_ops = {
2438 	.read     = apic_mmio_read,
2439 	.write    = apic_mmio_write,
2440 };
2441 
apic_timer_fn(struct hrtimer * data)2442 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2443 {
2444 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2445 	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2446 
2447 	apic_timer_expired(apic, true);
2448 
2449 	if (lapic_is_periodic(apic)) {
2450 		advance_periodic_target_expiration(apic);
2451 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2452 		return HRTIMER_RESTART;
2453 	} else
2454 		return HRTIMER_NORESTART;
2455 }
2456 
kvm_create_lapic(struct kvm_vcpu * vcpu,int timer_advance_ns)2457 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2458 {
2459 	struct kvm_lapic *apic;
2460 
2461 	ASSERT(vcpu != NULL);
2462 
2463 	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2464 	if (!apic)
2465 		goto nomem;
2466 
2467 	vcpu->arch.apic = apic;
2468 
2469 	apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2470 	if (!apic->regs) {
2471 		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2472 		       vcpu->vcpu_id);
2473 		goto nomem_free_apic;
2474 	}
2475 	apic->vcpu = vcpu;
2476 
2477 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2478 		     HRTIMER_MODE_ABS_HARD);
2479 	apic->lapic_timer.timer.function = apic_timer_fn;
2480 	if (timer_advance_ns == -1) {
2481 		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2482 		lapic_timer_advance_dynamic = true;
2483 	} else {
2484 		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2485 		lapic_timer_advance_dynamic = false;
2486 	}
2487 
2488 	/*
2489 	 * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
2490 	 * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
2491 	 */
2492 	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2493 	static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2494 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2495 
2496 	return 0;
2497 nomem_free_apic:
2498 	kfree(apic);
2499 	vcpu->arch.apic = NULL;
2500 nomem:
2501 	return -ENOMEM;
2502 }
2503 
kvm_apic_has_interrupt(struct kvm_vcpu * vcpu)2504 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2505 {
2506 	struct kvm_lapic *apic = vcpu->arch.apic;
2507 	u32 ppr;
2508 
2509 	if (!kvm_apic_present(vcpu))
2510 		return -1;
2511 
2512 	__apic_update_ppr(apic, &ppr);
2513 	return apic_has_interrupt_for_ppr(apic, ppr);
2514 }
2515 EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2516 
kvm_apic_accept_pic_intr(struct kvm_vcpu * vcpu)2517 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2518 {
2519 	u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2520 
2521 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2522 		return 1;
2523 	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2524 	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2525 		return 1;
2526 	return 0;
2527 }
2528 
kvm_inject_apic_timer_irqs(struct kvm_vcpu * vcpu)2529 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2530 {
2531 	struct kvm_lapic *apic = vcpu->arch.apic;
2532 
2533 	if (atomic_read(&apic->lapic_timer.pending) > 0) {
2534 		kvm_apic_inject_pending_timer_irqs(apic);
2535 		atomic_set(&apic->lapic_timer.pending, 0);
2536 	}
2537 }
2538 
kvm_get_apic_interrupt(struct kvm_vcpu * vcpu)2539 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2540 {
2541 	int vector = kvm_apic_has_interrupt(vcpu);
2542 	struct kvm_lapic *apic = vcpu->arch.apic;
2543 	u32 ppr;
2544 
2545 	if (vector == -1)
2546 		return -1;
2547 
2548 	/*
2549 	 * We get here even with APIC virtualization enabled, if doing
2550 	 * nested virtualization and L1 runs with the "acknowledge interrupt
2551 	 * on exit" mode.  Then we cannot inject the interrupt via RVI,
2552 	 * because the process would deliver it through the IDT.
2553 	 */
2554 
2555 	apic_clear_irr(vector, apic);
2556 	if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
2557 		/*
2558 		 * For auto-EOI interrupts, there might be another pending
2559 		 * interrupt above PPR, so check whether to raise another
2560 		 * KVM_REQ_EVENT.
2561 		 */
2562 		apic_update_ppr(apic);
2563 	} else {
2564 		/*
2565 		 * For normal interrupts, PPR has been raised and there cannot
2566 		 * be a higher-priority pending interrupt---except if there was
2567 		 * a concurrent interrupt injection, but that would have
2568 		 * triggered KVM_REQ_EVENT already.
2569 		 */
2570 		apic_set_isr(vector, apic);
2571 		__apic_update_ppr(apic, &ppr);
2572 	}
2573 
2574 	return vector;
2575 }
2576 
kvm_apic_state_fixup(struct kvm_vcpu * vcpu,struct kvm_lapic_state * s,bool set)2577 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2578 		struct kvm_lapic_state *s, bool set)
2579 {
2580 	if (apic_x2apic_mode(vcpu->arch.apic)) {
2581 		u32 *id = (u32 *)(s->regs + APIC_ID);
2582 		u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2583 
2584 		if (vcpu->kvm->arch.x2apic_format) {
2585 			if (*id != vcpu->vcpu_id)
2586 				return -EINVAL;
2587 		} else {
2588 			if (set)
2589 				*id >>= 24;
2590 			else
2591 				*id <<= 24;
2592 		}
2593 
2594 		/* In x2APIC mode, the LDR is fixed and based on the id */
2595 		if (set)
2596 			*ldr = kvm_apic_calc_x2apic_ldr(*id);
2597 	}
2598 
2599 	return 0;
2600 }
2601 
kvm_apic_get_state(struct kvm_vcpu * vcpu,struct kvm_lapic_state * s)2602 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2603 {
2604 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2605 
2606 	/*
2607 	 * Get calculated timer current count for remaining timer period (if
2608 	 * any) and store it in the returned register set.
2609 	 */
2610 	__kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2611 			    __apic_read(vcpu->arch.apic, APIC_TMCCT));
2612 
2613 	return kvm_apic_state_fixup(vcpu, s, false);
2614 }
2615 
kvm_apic_set_state(struct kvm_vcpu * vcpu,struct kvm_lapic_state * s)2616 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2617 {
2618 	struct kvm_lapic *apic = vcpu->arch.apic;
2619 	int r;
2620 
2621 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2622 	/* set SPIV separately to get count of SW disabled APICs right */
2623 	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2624 
2625 	r = kvm_apic_state_fixup(vcpu, s, true);
2626 	if (r) {
2627 		kvm_recalculate_apic_map(vcpu->kvm);
2628 		return r;
2629 	}
2630 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2631 
2632 	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2633 	kvm_recalculate_apic_map(vcpu->kvm);
2634 	kvm_apic_set_version(vcpu);
2635 
2636 	apic_update_ppr(apic);
2637 	cancel_apic_timer(apic);
2638 	apic->lapic_timer.expired_tscdeadline = 0;
2639 	apic_update_lvtt(apic);
2640 	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2641 	update_divide_count(apic);
2642 	__start_apic_timer(apic, APIC_TMCCT);
2643 	kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
2644 	kvm_apic_update_apicv(vcpu);
2645 	if (vcpu->arch.apicv_active) {
2646 		static_call(kvm_x86_apicv_post_state_restore)(vcpu);
2647 		static_call(kvm_x86_hwapic_irr_update)(vcpu,
2648 				apic_find_highest_irr(apic));
2649 		static_call(kvm_x86_hwapic_isr_update)(vcpu,
2650 				apic_find_highest_isr(apic));
2651 	}
2652 	kvm_make_request(KVM_REQ_EVENT, vcpu);
2653 	if (ioapic_in_kernel(vcpu->kvm))
2654 		kvm_rtc_eoi_tracking_restore_one(vcpu);
2655 
2656 	vcpu->arch.apic_arb_prio = 0;
2657 
2658 	return 0;
2659 }
2660 
__kvm_migrate_apic_timer(struct kvm_vcpu * vcpu)2661 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2662 {
2663 	struct hrtimer *timer;
2664 
2665 	if (!lapic_in_kernel(vcpu) ||
2666 		kvm_can_post_timer_interrupt(vcpu))
2667 		return;
2668 
2669 	timer = &vcpu->arch.apic->lapic_timer.timer;
2670 	if (hrtimer_cancel(timer))
2671 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2672 }
2673 
2674 /*
2675  * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2676  *
2677  * Detect whether guest triggered PV EOI since the
2678  * last entry. If yes, set EOI on guests's behalf.
2679  * Clear PV EOI in guest memory in any case.
2680  */
apic_sync_pv_eoi_from_guest(struct kvm_vcpu * vcpu,struct kvm_lapic * apic)2681 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2682 					struct kvm_lapic *apic)
2683 {
2684 	bool pending;
2685 	int vector;
2686 	/*
2687 	 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2688 	 * and KVM_PV_EOI_ENABLED in guest memory as follows:
2689 	 *
2690 	 * KVM_APIC_PV_EOI_PENDING is unset:
2691 	 * 	-> host disabled PV EOI.
2692 	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2693 	 * 	-> host enabled PV EOI, guest did not execute EOI yet.
2694 	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2695 	 * 	-> host enabled PV EOI, guest executed EOI.
2696 	 */
2697 	BUG_ON(!pv_eoi_enabled(vcpu));
2698 	pending = pv_eoi_get_pending(vcpu);
2699 	/*
2700 	 * Clear pending bit in any case: it will be set again on vmentry.
2701 	 * While this might not be ideal from performance point of view,
2702 	 * this makes sure pv eoi is only enabled when we know it's safe.
2703 	 */
2704 	pv_eoi_clr_pending(vcpu);
2705 	if (pending)
2706 		return;
2707 	vector = apic_set_eoi(apic);
2708 	trace_kvm_pv_eoi(apic, vector);
2709 }
2710 
kvm_lapic_sync_from_vapic(struct kvm_vcpu * vcpu)2711 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2712 {
2713 	u32 data;
2714 
2715 	if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2716 		apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2717 
2718 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2719 		return;
2720 
2721 	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2722 				  sizeof(u32)))
2723 		return;
2724 
2725 	apic_set_tpr(vcpu->arch.apic, data & 0xff);
2726 }
2727 
2728 /*
2729  * apic_sync_pv_eoi_to_guest - called before vmentry
2730  *
2731  * Detect whether it's safe to enable PV EOI and
2732  * if yes do so.
2733  */
apic_sync_pv_eoi_to_guest(struct kvm_vcpu * vcpu,struct kvm_lapic * apic)2734 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2735 					struct kvm_lapic *apic)
2736 {
2737 	if (!pv_eoi_enabled(vcpu) ||
2738 	    /* IRR set or many bits in ISR: could be nested. */
2739 	    apic->irr_pending ||
2740 	    /* Cache not set: could be safe but we don't bother. */
2741 	    apic->highest_isr_cache == -1 ||
2742 	    /* Need EOI to update ioapic. */
2743 	    kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2744 		/*
2745 		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2746 		 * so we need not do anything here.
2747 		 */
2748 		return;
2749 	}
2750 
2751 	pv_eoi_set_pending(apic->vcpu);
2752 }
2753 
kvm_lapic_sync_to_vapic(struct kvm_vcpu * vcpu)2754 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2755 {
2756 	u32 data, tpr;
2757 	int max_irr, max_isr;
2758 	struct kvm_lapic *apic = vcpu->arch.apic;
2759 
2760 	apic_sync_pv_eoi_to_guest(vcpu, apic);
2761 
2762 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2763 		return;
2764 
2765 	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2766 	max_irr = apic_find_highest_irr(apic);
2767 	if (max_irr < 0)
2768 		max_irr = 0;
2769 	max_isr = apic_find_highest_isr(apic);
2770 	if (max_isr < 0)
2771 		max_isr = 0;
2772 	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2773 
2774 	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2775 				sizeof(u32));
2776 }
2777 
kvm_lapic_set_vapic_addr(struct kvm_vcpu * vcpu,gpa_t vapic_addr)2778 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2779 {
2780 	if (vapic_addr) {
2781 		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2782 					&vcpu->arch.apic->vapic_cache,
2783 					vapic_addr, sizeof(u32)))
2784 			return -EINVAL;
2785 		__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2786 	} else {
2787 		__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2788 	}
2789 
2790 	vcpu->arch.apic->vapic_addr = vapic_addr;
2791 	return 0;
2792 }
2793 
kvm_x2apic_msr_write(struct kvm_vcpu * vcpu,u32 msr,u64 data)2794 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2795 {
2796 	struct kvm_lapic *apic = vcpu->arch.apic;
2797 	u32 reg = (msr - APIC_BASE_MSR) << 4;
2798 
2799 	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2800 		return 1;
2801 
2802 	if (reg == APIC_ICR2)
2803 		return 1;
2804 
2805 	/* if this is ICR write vector before command */
2806 	if (reg == APIC_ICR)
2807 		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2808 	else if (data >> 32)
2809 		/* Bits 63:32 are reserved in all other registers. */
2810 		return 1;
2811 
2812 	return kvm_lapic_reg_write(apic, reg, (u32)data);
2813 }
2814 
kvm_x2apic_msr_read(struct kvm_vcpu * vcpu,u32 msr,u64 * data)2815 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2816 {
2817 	struct kvm_lapic *apic = vcpu->arch.apic;
2818 	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2819 
2820 	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2821 		return 1;
2822 
2823 	if (reg == APIC_DFR || reg == APIC_ICR2)
2824 		return 1;
2825 
2826 	if (kvm_lapic_reg_read(apic, reg, 4, &low))
2827 		return 1;
2828 	if (reg == APIC_ICR)
2829 		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2830 
2831 	*data = (((u64)high) << 32) | low;
2832 
2833 	return 0;
2834 }
2835 
kvm_hv_vapic_msr_write(struct kvm_vcpu * vcpu,u32 reg,u64 data)2836 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2837 {
2838 	struct kvm_lapic *apic = vcpu->arch.apic;
2839 
2840 	if (!lapic_in_kernel(vcpu))
2841 		return 1;
2842 
2843 	/* if this is ICR write vector before command */
2844 	if (reg == APIC_ICR)
2845 		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2846 	else if (data >> 32)
2847 		/* Bits 63:32 are reserved in all other registers. */
2848 		return 1;
2849 
2850 	return kvm_lapic_reg_write(apic, reg, (u32)data);
2851 }
2852 
kvm_hv_vapic_msr_read(struct kvm_vcpu * vcpu,u32 reg,u64 * data)2853 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2854 {
2855 	struct kvm_lapic *apic = vcpu->arch.apic;
2856 	u32 low, high = 0;
2857 
2858 	if (!lapic_in_kernel(vcpu))
2859 		return 1;
2860 
2861 	if (kvm_lapic_reg_read(apic, reg, 4, &low))
2862 		return 1;
2863 	if (reg == APIC_ICR)
2864 		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2865 
2866 	*data = (((u64)high) << 32) | low;
2867 
2868 	return 0;
2869 }
2870 
kvm_lapic_enable_pv_eoi(struct kvm_vcpu * vcpu,u64 data,unsigned long len)2871 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2872 {
2873 	u64 addr = data & ~KVM_MSR_ENABLED;
2874 	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2875 	unsigned long new_len;
2876 
2877 	if (!IS_ALIGNED(addr, 4))
2878 		return 1;
2879 
2880 	vcpu->arch.pv_eoi.msr_val = data;
2881 	if (!pv_eoi_enabled(vcpu))
2882 		return 0;
2883 
2884 	if (addr == ghc->gpa && len <= ghc->len)
2885 		new_len = ghc->len;
2886 	else
2887 		new_len = len;
2888 
2889 	return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2890 }
2891 
kvm_apic_accept_events(struct kvm_vcpu * vcpu)2892 int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2893 {
2894 	struct kvm_lapic *apic = vcpu->arch.apic;
2895 	u8 sipi_vector;
2896 	int r;
2897 	unsigned long pe;
2898 
2899 	if (!lapic_in_kernel(vcpu))
2900 		return 0;
2901 
2902 	/*
2903 	 * Read pending events before calling the check_events
2904 	 * callback.
2905 	 */
2906 	pe = smp_load_acquire(&apic->pending_events);
2907 	if (!pe)
2908 		return 0;
2909 
2910 	if (is_guest_mode(vcpu)) {
2911 		r = kvm_check_nested_events(vcpu);
2912 		if (r < 0)
2913 			return r == -EBUSY ? 0 : r;
2914 		/*
2915 		 * If an event has happened and caused a vmexit,
2916 		 * we know INITs are latched and therefore
2917 		 * we will not incorrectly deliver an APIC
2918 		 * event instead of a vmexit.
2919 		 */
2920 	}
2921 
2922 	/*
2923 	 * INITs are latched while CPU is in specific states
2924 	 * (SMM, VMX root mode, SVM with GIF=0).
2925 	 * Because a CPU cannot be in these states immediately
2926 	 * after it has processed an INIT signal (and thus in
2927 	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2928 	 * and leave the INIT pending.
2929 	 */
2930 	if (kvm_vcpu_latch_init(vcpu)) {
2931 		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2932 		if (test_bit(KVM_APIC_SIPI, &pe))
2933 			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2934 		return 0;
2935 	}
2936 
2937 	if (test_bit(KVM_APIC_INIT, &pe)) {
2938 		clear_bit(KVM_APIC_INIT, &apic->pending_events);
2939 		kvm_vcpu_reset(vcpu, true);
2940 		if (kvm_vcpu_is_bsp(apic->vcpu))
2941 			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2942 		else
2943 			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2944 	}
2945 	if (test_bit(KVM_APIC_SIPI, &pe)) {
2946 		clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2947 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2948 			/* evaluate pending_events before reading the vector */
2949 			smp_rmb();
2950 			sipi_vector = apic->sipi_vector;
2951 			kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2952 			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2953 		}
2954 	}
2955 	return 0;
2956 }
2957 
kvm_lapic_exit(void)2958 void kvm_lapic_exit(void)
2959 {
2960 	static_key_deferred_flush(&apic_hw_disabled);
2961 	WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
2962 	static_key_deferred_flush(&apic_sw_disabled);
2963 	WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
2964 }
2965