Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kvm/lapic.c
29521 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
3
/*
4
* Local APIC virtualization
5
*
6
* Copyright (C) 2006 Qumranet, Inc.
7
* Copyright (C) 2007 Novell
8
* Copyright (C) 2007 Intel
9
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
10
*
11
* Authors:
12
* Dor Laor <[email protected]>
13
* Gregory Haskins <[email protected]>
14
* Yaozu (Eddie) Dong <[email protected]>
15
*
16
* Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
17
*/
18
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20
#include <linux/kvm_host.h>
21
#include <linux/kvm.h>
22
#include <linux/mm.h>
23
#include <linux/highmem.h>
24
#include <linux/smp.h>
25
#include <linux/hrtimer.h>
26
#include <linux/io.h>
27
#include <linux/export.h>
28
#include <linux/math64.h>
29
#include <linux/slab.h>
30
#include <asm/apic.h>
31
#include <asm/processor.h>
32
#include <asm/mce.h>
33
#include <asm/msr.h>
34
#include <asm/page.h>
35
#include <asm/current.h>
36
#include <asm/apicdef.h>
37
#include <asm/delay.h>
38
#include <linux/atomic.h>
39
#include <linux/jump_label.h>
40
#include "kvm_cache_regs.h"
41
#include "irq.h"
42
#include "ioapic.h"
43
#include "trace.h"
44
#include "x86.h"
45
#include "xen.h"
46
#include "cpuid.h"
47
#include "hyperv.h"
48
#include "smm.h"
49
50
#ifndef CONFIG_X86_64
51
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
52
#else
53
#define mod_64(x, y) ((x) % (y))
54
#endif
55
56
/* 14 is the version for Xeon and Pentium 8.4.8*/
57
#define APIC_VERSION 0x14UL
58
#define LAPIC_MMIO_LENGTH (1 << 12)
59
60
/*
61
* Enable local APIC timer advancement (tscdeadline mode only) with adaptive
62
* tuning. When enabled, KVM programs the host timer event to fire early, i.e.
63
* before the deadline expires, to account for the delay between taking the
64
* VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
65
* the guest, i.e. so that the interrupt arrives in the guest with minimal
66
* latency relative to the deadline programmed by the guest.
67
*/
68
static bool lapic_timer_advance __read_mostly = true;
69
module_param(lapic_timer_advance, bool, 0444);
70
71
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
72
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
73
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
74
#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
75
/* step-by-step approximation to mitigate fluctuation */
76
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
77
78
static bool __read_mostly vector_hashing_enabled = true;
79
module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444);
80
81
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
82
static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data);
83
84
static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
85
{
86
apic_set_reg(apic->regs, reg_off, val);
87
}
88
89
static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg)
90
{
91
return apic_get_reg64(apic->regs, reg);
92
}
93
94
static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic,
95
int reg, u64 val)
96
{
97
apic_set_reg64(apic->regs, reg, val);
98
}
99
100
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
101
{
102
struct kvm_lapic *apic = vcpu->arch.apic;
103
104
return apic_test_vector(vector, apic->regs + APIC_ISR) ||
105
apic_test_vector(vector, apic->regs + APIC_IRR);
106
}
107
108
__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
109
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_has_noapic_vcpu);
110
111
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
112
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
113
114
static inline int apic_enabled(struct kvm_lapic *apic)
115
{
116
return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
117
}
118
119
#define LVT_MASK \
120
(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
121
122
#define LINT_MASK \
123
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
124
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
125
126
static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
127
{
128
return apic->vcpu->vcpu_id;
129
}
130
131
static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
132
{
133
return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
134
(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
135
}
136
137
static bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
138
{
139
return kvm_x86_ops.set_hv_timer
140
&& !(kvm_mwait_in_guest(vcpu->kvm) ||
141
kvm_can_post_timer_interrupt(vcpu));
142
}
143
144
static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
145
{
146
return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
147
}
148
149
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
150
{
151
return ((id >> 4) << 16) | (1 << (id & 0xf));
152
}
153
154
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
155
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
156
switch (map->logical_mode) {
157
case KVM_APIC_MODE_SW_DISABLED:
158
/* Arbitrarily use the flat map so that @cluster isn't NULL. */
159
*cluster = map->xapic_flat_map;
160
*mask = 0;
161
return true;
162
case KVM_APIC_MODE_X2APIC: {
163
u32 offset = (dest_id >> 16) * 16;
164
u32 max_apic_id = map->max_apic_id;
165
166
if (offset <= max_apic_id) {
167
u8 cluster_size = min(max_apic_id - offset + 1, 16U);
168
169
offset = array_index_nospec(offset, map->max_apic_id + 1);
170
*cluster = &map->phys_map[offset];
171
*mask = dest_id & (0xffff >> (16 - cluster_size));
172
} else {
173
*mask = 0;
174
}
175
176
return true;
177
}
178
case KVM_APIC_MODE_XAPIC_FLAT:
179
*cluster = map->xapic_flat_map;
180
*mask = dest_id & 0xff;
181
return true;
182
case KVM_APIC_MODE_XAPIC_CLUSTER:
183
*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
184
*mask = dest_id & 0xf;
185
return true;
186
case KVM_APIC_MODE_MAP_DISABLED:
187
return false;
188
default:
189
WARN_ON_ONCE(1);
190
return false;
191
}
192
}
193
194
static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
195
struct kvm_vcpu *vcpu,
196
bool *xapic_id_mismatch)
197
{
198
struct kvm_lapic *apic = vcpu->arch.apic;
199
u32 x2apic_id = kvm_x2apic_id(apic);
200
u32 xapic_id = kvm_xapic_id(apic);
201
u32 physical_id;
202
203
/*
204
* For simplicity, KVM always allocates enough space for all possible
205
* xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on
206
* without the optimized map.
207
*/
208
if (WARN_ON_ONCE(xapic_id > new->max_apic_id))
209
return -EINVAL;
210
211
/*
212
* Bail if a vCPU was added and/or enabled its APIC between allocating
213
* the map and doing the actual calculations for the map. Note, KVM
214
* hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if
215
* the compiler decides to reload x2apic_id after this check.
216
*/
217
if (x2apic_id > new->max_apic_id)
218
return -E2BIG;
219
220
/*
221
* Deliberately truncate the vCPU ID when detecting a mismatched APIC
222
* ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a
223
* 32-bit value. Any unwanted aliasing due to truncation results will
224
* be detected below.
225
*/
226
if (!apic_x2apic_mode(apic) && xapic_id != (u8)vcpu->vcpu_id)
227
*xapic_id_mismatch = true;
228
229
/*
230
* Apply KVM's hotplug hack if userspace has enable 32-bit APIC IDs.
231
* Allow sending events to vCPUs by their x2APIC ID even if the target
232
* vCPU is in legacy xAPIC mode, and silently ignore aliased xAPIC IDs
233
* (the x2APIC ID is truncated to 8 bits, causing IDs > 0xff to wrap
234
* and collide).
235
*
236
* Honor the architectural (and KVM's non-optimized) behavior if
237
* userspace has not enabled 32-bit x2APIC IDs. Each APIC is supposed
238
* to process messages independently. If multiple vCPUs have the same
239
* effective APIC ID, e.g. due to the x2APIC wrap or because the guest
240
* manually modified its xAPIC IDs, events targeting that ID are
241
* supposed to be recognized by all vCPUs with said ID.
242
*/
243
if (vcpu->kvm->arch.x2apic_format) {
244
/* See also kvm_apic_match_physical_addr(). */
245
if (apic_x2apic_mode(apic) || x2apic_id > 0xff)
246
new->phys_map[x2apic_id] = apic;
247
248
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
249
new->phys_map[xapic_id] = apic;
250
} else {
251
/*
252
* Disable the optimized map if the physical APIC ID is already
253
* mapped, i.e. is aliased to multiple vCPUs. The optimized
254
* map requires a strict 1:1 mapping between IDs and vCPUs.
255
*/
256
if (apic_x2apic_mode(apic))
257
physical_id = x2apic_id;
258
else
259
physical_id = xapic_id;
260
261
if (new->phys_map[physical_id])
262
return -EINVAL;
263
264
new->phys_map[physical_id] = apic;
265
}
266
267
return 0;
268
}
269
270
static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
271
struct kvm_vcpu *vcpu)
272
{
273
struct kvm_lapic *apic = vcpu->arch.apic;
274
enum kvm_apic_logical_mode logical_mode;
275
struct kvm_lapic **cluster;
276
u16 mask;
277
u32 ldr;
278
279
if (new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
280
return;
281
282
if (!kvm_apic_sw_enabled(apic))
283
return;
284
285
ldr = kvm_lapic_get_reg(apic, APIC_LDR);
286
if (!ldr)
287
return;
288
289
if (apic_x2apic_mode(apic)) {
290
logical_mode = KVM_APIC_MODE_X2APIC;
291
} else {
292
ldr = GET_APIC_LOGICAL_ID(ldr);
293
if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
294
logical_mode = KVM_APIC_MODE_XAPIC_FLAT;
295
else
296
logical_mode = KVM_APIC_MODE_XAPIC_CLUSTER;
297
}
298
299
/*
300
* To optimize logical mode delivery, all software-enabled APICs must
301
* be configured for the same mode.
302
*/
303
if (new->logical_mode == KVM_APIC_MODE_SW_DISABLED) {
304
new->logical_mode = logical_mode;
305
} else if (new->logical_mode != logical_mode) {
306
new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
307
return;
308
}
309
310
/*
311
* In x2APIC mode, the LDR is read-only and derived directly from the
312
* x2APIC ID, thus is guaranteed to be addressable. KVM reuses
313
* kvm_apic_map.phys_map to optimize logical mode x2APIC interrupts by
314
* reversing the LDR calculation to get cluster of APICs, i.e. no
315
* additional work is required.
316
*/
317
if (apic_x2apic_mode(apic))
318
return;
319
320
if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
321
&cluster, &mask))) {
322
new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
323
return;
324
}
325
326
if (!mask)
327
return;
328
329
ldr = ffs(mask) - 1;
330
if (!is_power_of_2(mask) || cluster[ldr])
331
new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
332
else
333
cluster[ldr] = apic;
334
}
335
336
/*
337
* CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
338
*
339
* DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
340
* apic_map_lock_held.
341
*/
342
enum {
343
CLEAN,
344
UPDATE_IN_PROGRESS,
345
DIRTY
346
};
347
348
static void kvm_recalculate_apic_map(struct kvm *kvm)
349
{
350
struct kvm_apic_map *new, *old = NULL;
351
struct kvm_vcpu *vcpu;
352
unsigned long i;
353
u32 max_id = 255; /* enough space for any xAPIC ID */
354
bool xapic_id_mismatch;
355
int r;
356
357
/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
358
if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
359
return;
360
361
WARN_ONCE(!irqchip_in_kernel(kvm),
362
"Dirty APIC map without an in-kernel local APIC");
363
364
mutex_lock(&kvm->arch.apic_map_lock);
365
366
retry:
367
/*
368
* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
369
* or the APIC registers (if dirty). Note, on retry the map may have
370
* not yet been marked dirty by whatever task changed a vCPU's x2APIC
371
* ID, i.e. the map may still show up as in-progress. In that case
372
* this task still needs to retry and complete its calculation.
373
*/
374
if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
375
DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
376
/* Someone else has updated the map. */
377
mutex_unlock(&kvm->arch.apic_map_lock);
378
return;
379
}
380
381
/*
382
* Reset the mismatch flag between attempts so that KVM does the right
383
* thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
384
* keep max_id strictly increasing. Disallowing max_id from shrinking
385
* ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
386
* with the highest x2APIC ID is toggling its APIC on and off.
387
*/
388
xapic_id_mismatch = false;
389
390
kvm_for_each_vcpu(i, vcpu, kvm)
391
if (kvm_apic_present(vcpu))
392
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
393
394
new = kvzalloc(sizeof(struct kvm_apic_map) +
395
sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
396
GFP_KERNEL_ACCOUNT);
397
398
if (!new)
399
goto out;
400
401
new->max_apic_id = max_id;
402
new->logical_mode = KVM_APIC_MODE_SW_DISABLED;
403
404
kvm_for_each_vcpu(i, vcpu, kvm) {
405
if (!kvm_apic_present(vcpu))
406
continue;
407
408
r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
409
if (r) {
410
kvfree(new);
411
new = NULL;
412
if (r == -E2BIG) {
413
cond_resched();
414
goto retry;
415
}
416
417
goto out;
418
}
419
420
kvm_recalculate_logical_map(new, vcpu);
421
}
422
out:
423
/*
424
* The optimized map is effectively KVM's internal version of APICv,
425
* and all unwanted aliasing that results in disabling the optimized
426
* map also applies to APICv.
427
*/
428
if (!new)
429
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
430
else
431
kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
432
433
if (!new || new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
434
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
435
else
436
kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
437
438
if (xapic_id_mismatch)
439
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
440
else
441
kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
442
443
old = rcu_dereference_protected(kvm->arch.apic_map,
444
lockdep_is_held(&kvm->arch.apic_map_lock));
445
rcu_assign_pointer(kvm->arch.apic_map, new);
446
/*
447
* Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
448
* If another update has come in, leave it DIRTY.
449
*/
450
atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
451
UPDATE_IN_PROGRESS, CLEAN);
452
mutex_unlock(&kvm->arch.apic_map_lock);
453
454
if (old)
455
kvfree_rcu(old, rcu);
456
457
kvm_make_scan_ioapic_request(kvm);
458
}
459
460
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
461
{
462
bool enabled = val & APIC_SPIV_APIC_ENABLED;
463
464
kvm_lapic_set_reg(apic, APIC_SPIV, val);
465
466
if (enabled != apic->sw_enabled) {
467
apic->sw_enabled = enabled;
468
if (enabled)
469
static_branch_slow_dec_deferred(&apic_sw_disabled);
470
else
471
static_branch_inc(&apic_sw_disabled.key);
472
473
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
474
}
475
476
/* Check if there are APF page ready requests pending */
477
if (enabled) {
478
kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
479
kvm_xen_sw_enable_lapic(apic->vcpu);
480
}
481
}
482
483
static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
484
{
485
kvm_lapic_set_reg(apic, APIC_ID, id << 24);
486
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
487
}
488
489
static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
490
{
491
kvm_lapic_set_reg(apic, APIC_LDR, id);
492
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
493
}
494
495
static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
496
{
497
kvm_lapic_set_reg(apic, APIC_DFR, val);
498
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
499
}
500
501
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
502
{
503
u32 ldr = kvm_apic_calc_x2apic_ldr(id);
504
505
WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
506
507
kvm_lapic_set_reg(apic, APIC_ID, id);
508
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
509
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
510
}
511
512
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
513
{
514
return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
515
}
516
517
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
518
{
519
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
520
}
521
522
static inline int apic_lvtt_period(struct kvm_lapic *apic)
523
{
524
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
525
}
526
527
static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
528
{
529
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
530
}
531
532
static inline int apic_lvt_nmi_mode(u32 lvt_val)
533
{
534
return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
535
}
536
537
static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index)
538
{
539
return apic->nr_lvt_entries > lvt_index;
540
}
541
542
static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu)
543
{
544
return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P);
545
}
546
547
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
548
{
549
struct kvm_lapic *apic = vcpu->arch.apic;
550
u32 v = 0;
551
552
if (!lapic_in_kernel(vcpu))
553
return;
554
555
v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16);
556
557
/*
558
* KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
559
* which doesn't have EOI register; Some buggy OSes (e.g. Windows with
560
* Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
561
* version first and level-triggered interrupts never get EOIed in
562
* IOAPIC.
563
*/
564
if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) &&
565
!ioapic_in_kernel(vcpu->kvm))
566
v |= APIC_LVR_DIRECTED_EOI;
567
kvm_lapic_set_reg(apic, APIC_LVR, v);
568
}
569
570
void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu)
571
{
572
int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
573
struct kvm_lapic *apic = vcpu->arch.apic;
574
int i;
575
576
if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries)
577
return;
578
579
/* Initialize/mask any "new" LVT entries. */
580
for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++)
581
kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
582
583
apic->nr_lvt_entries = nr_lvt_entries;
584
585
/* The number of LVT entries is reflected in the version register. */
586
kvm_apic_set_version(vcpu);
587
}
588
589
static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = {
590
[LVT_TIMER] = LVT_MASK, /* timer mode mask added at runtime */
591
[LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK,
592
[LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK,
593
[LVT_LINT0] = LINT_MASK,
594
[LVT_LINT1] = LINT_MASK,
595
[LVT_ERROR] = LVT_MASK,
596
[LVT_CMCI] = LVT_MASK | APIC_MODE_MASK
597
};
598
599
static u8 count_vectors(void *bitmap)
600
{
601
int vec;
602
u32 *reg;
603
u8 count = 0;
604
605
for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
606
reg = bitmap + APIC_VECTOR_TO_REG_OFFSET(vec);
607
count += hweight32(*reg);
608
}
609
610
return count;
611
}
612
613
bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr)
614
{
615
unsigned long pir_vals[NR_PIR_WORDS];
616
u32 *__pir = (void *)pir_vals;
617
u32 i, vec;
618
u32 irr_val, prev_irr_val;
619
int max_updated_irr;
620
621
max_updated_irr = -1;
622
*max_irr = -1;
623
624
if (!pi_harvest_pir(pir, pir_vals))
625
return false;
626
627
for (i = vec = 0; i <= 7; i++, vec += 32) {
628
u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
629
630
irr_val = READ_ONCE(*p_irr);
631
632
if (__pir[i]) {
633
prev_irr_val = irr_val;
634
do {
635
irr_val = prev_irr_val | __pir[i];
636
} while (prev_irr_val != irr_val &&
637
!try_cmpxchg(p_irr, &prev_irr_val, irr_val));
638
639
if (prev_irr_val != irr_val)
640
max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
641
}
642
if (irr_val)
643
*max_irr = __fls(irr_val) + vec;
644
}
645
646
return ((max_updated_irr != -1) &&
647
(max_updated_irr == *max_irr));
648
}
649
EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_apic_update_irr);
650
651
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr)
652
{
653
struct kvm_lapic *apic = vcpu->arch.apic;
654
bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
655
656
if (unlikely(!apic->apicv_active && irr_updated))
657
apic->irr_pending = true;
658
return irr_updated;
659
}
660
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_irr);
661
662
static inline int apic_search_irr(struct kvm_lapic *apic)
663
{
664
return apic_find_highest_vector(apic->regs + APIC_IRR);
665
}
666
667
static inline int apic_find_highest_irr(struct kvm_lapic *apic)
668
{
669
int result;
670
671
/*
672
* Note that irr_pending is just a hint. It will be always
673
* true with virtual interrupt delivery enabled.
674
*/
675
if (!apic->irr_pending)
676
return -1;
677
678
result = apic_search_irr(apic);
679
ASSERT(result == -1 || result >= 16);
680
681
return result;
682
}
683
684
static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
685
{
686
if (unlikely(apic->apicv_active)) {
687
apic_clear_vector(vec, apic->regs + APIC_IRR);
688
} else {
689
apic->irr_pending = false;
690
apic_clear_vector(vec, apic->regs + APIC_IRR);
691
if (apic_search_irr(apic) != -1)
692
apic->irr_pending = true;
693
}
694
}
695
696
void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
697
{
698
apic_clear_irr(vec, vcpu->arch.apic);
699
}
700
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_clear_irr);
701
702
static void *apic_vector_to_isr(int vec, struct kvm_lapic *apic)
703
{
704
return apic->regs + APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(vec);
705
}
706
707
static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
708
{
709
if (__test_and_set_bit(APIC_VECTOR_TO_BIT_NUMBER(vec),
710
apic_vector_to_isr(vec, apic)))
711
return;
712
713
/*
714
* With APIC virtualization enabled, all caching is disabled
715
* because the processor can modify ISR under the hood. Instead
716
* just set SVI.
717
*/
718
if (unlikely(apic->apicv_active))
719
kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec);
720
else {
721
++apic->isr_count;
722
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
723
/*
724
* ISR (in service register) bit is set when injecting an interrupt.
725
* The highest vector is injected. Thus the latest bit set matches
726
* the highest bit in ISR.
727
*/
728
apic->highest_isr_cache = vec;
729
}
730
}
731
732
static inline int apic_find_highest_isr(struct kvm_lapic *apic)
733
{
734
int result;
735
736
/*
737
* Note that isr_count is always 1, and highest_isr_cache
738
* is always -1, with APIC virtualization enabled.
739
*/
740
if (!apic->isr_count)
741
return -1;
742
if (likely(apic->highest_isr_cache != -1))
743
return apic->highest_isr_cache;
744
745
result = apic_find_highest_vector(apic->regs + APIC_ISR);
746
ASSERT(result == -1 || result >= 16);
747
748
return result;
749
}
750
751
static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
752
{
753
if (!__test_and_clear_bit(APIC_VECTOR_TO_BIT_NUMBER(vec),
754
apic_vector_to_isr(vec, apic)))
755
return;
756
757
/*
758
* We do get here for APIC virtualization enabled if the guest
759
* uses the Hyper-V APIC enlightenment. In this case we may need
760
* to trigger a new interrupt delivery by writing the SVI field;
761
* on the other hand isr_count and highest_isr_cache are unused
762
* and must be left alone.
763
*/
764
if (unlikely(apic->apicv_active))
765
kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic));
766
else {
767
--apic->isr_count;
768
BUG_ON(apic->isr_count < 0);
769
apic->highest_isr_cache = -1;
770
}
771
}
772
773
void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu)
774
{
775
struct kvm_lapic *apic = vcpu->arch.apic;
776
777
if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active)
778
return;
779
780
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
781
}
782
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_hwapic_isr);
783
784
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
785
{
786
/* This may race with setting of irr in __apic_accept_irq() and
787
* value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
788
* will cause vmexit immediately and the value will be recalculated
789
* on the next vmentry.
790
*/
791
return apic_find_highest_irr(vcpu->arch.apic);
792
}
793
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_find_highest_irr);
794
795
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
796
int vector, int level, int trig_mode,
797
struct dest_map *dest_map);
798
799
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
800
struct dest_map *dest_map)
801
{
802
struct kvm_lapic *apic = vcpu->arch.apic;
803
804
return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
805
irq->level, irq->trig_mode, dest_map);
806
}
807
808
static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
809
struct kvm_lapic_irq *irq, u32 min)
810
{
811
int i, count = 0;
812
struct kvm_vcpu *vcpu;
813
814
if (min > map->max_apic_id)
815
return 0;
816
817
min = array_index_nospec(min, map->max_apic_id + 1);
818
819
for_each_set_bit(i, ipi_bitmap,
820
min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
821
if (map->phys_map[min + i]) {
822
vcpu = map->phys_map[min + i]->vcpu;
823
count += kvm_apic_set_irq(vcpu, irq, NULL);
824
}
825
}
826
827
return count;
828
}
829
830
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
831
unsigned long ipi_bitmap_high, u32 min,
832
unsigned long icr, int op_64_bit)
833
{
834
struct kvm_apic_map *map;
835
struct kvm_lapic_irq irq = {0};
836
int cluster_size = op_64_bit ? 64 : 32;
837
int count;
838
839
if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
840
return -KVM_EINVAL;
841
842
irq.vector = icr & APIC_VECTOR_MASK;
843
irq.delivery_mode = icr & APIC_MODE_MASK;
844
irq.level = (icr & APIC_INT_ASSERT) != 0;
845
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
846
847
rcu_read_lock();
848
map = rcu_dereference(kvm->arch.apic_map);
849
850
count = -EOPNOTSUPP;
851
if (likely(map)) {
852
count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
853
min += cluster_size;
854
count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
855
}
856
857
rcu_read_unlock();
858
return count;
859
}
860
861
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
862
{
863
864
return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
865
sizeof(val));
866
}
867
868
static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
869
{
870
871
return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
872
sizeof(*val));
873
}
874
875
static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
876
{
877
return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
878
}
879
880
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
881
{
882
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
883
return;
884
885
__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
886
}
887
888
static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
889
{
890
u8 val;
891
892
if (pv_eoi_get_user(vcpu, &val) < 0)
893
return false;
894
895
val &= KVM_PV_EOI_ENABLED;
896
897
if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
898
return false;
899
900
/*
901
* Clear pending bit in any case: it will be set again on vmentry.
902
* While this might not be ideal from performance point of view,
903
* this makes sure pv eoi is only enabled when we know it's safe.
904
*/
905
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
906
907
return val;
908
}
909
910
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
911
{
912
int highest_irr;
913
if (kvm_x86_ops.sync_pir_to_irr)
914
highest_irr = kvm_x86_call(sync_pir_to_irr)(apic->vcpu);
915
else
916
highest_irr = apic_find_highest_irr(apic);
917
if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
918
return -1;
919
return highest_irr;
920
}
921
922
static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
923
{
924
u32 tpr, isrv, ppr, old_ppr;
925
int isr;
926
927
old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
928
tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
929
isr = apic_find_highest_isr(apic);
930
isrv = (isr != -1) ? isr : 0;
931
932
if ((tpr & 0xf0) >= (isrv & 0xf0))
933
ppr = tpr & 0xff;
934
else
935
ppr = isrv & 0xf0;
936
937
*new_ppr = ppr;
938
if (old_ppr != ppr)
939
kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
940
941
return ppr < old_ppr;
942
}
943
944
static void apic_update_ppr(struct kvm_lapic *apic)
945
{
946
u32 ppr;
947
948
if (__apic_update_ppr(apic, &ppr) &&
949
apic_has_interrupt_for_ppr(apic, ppr) != -1)
950
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
951
}
952
953
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
954
{
955
apic_update_ppr(vcpu->arch.apic);
956
}
957
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_ppr);
958
959
static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
960
{
961
kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
962
apic_update_ppr(apic);
963
}
964
965
static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
966
{
967
return mda == (apic_x2apic_mode(apic) ?
968
X2APIC_BROADCAST : APIC_BROADCAST);
969
}
970
971
static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
972
{
973
if (kvm_apic_broadcast(apic, mda))
974
return true;
975
976
/*
977
* Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they
978
* were in x2APIC mode if the target APIC ID can't be encoded as an
979
* xAPIC ID. This allows unique addressing of hotplugged vCPUs (which
980
* start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC
981
* mode. Match the x2APIC ID if and only if the target APIC ID can't
982
* be encoded in xAPIC to avoid spurious matches against a vCPU that
983
* changed its (addressable) xAPIC ID (which is writable).
984
*/
985
if (apic_x2apic_mode(apic) || mda > 0xff)
986
return mda == kvm_x2apic_id(apic);
987
988
return mda == kvm_xapic_id(apic);
989
}
990
991
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
992
{
993
u32 logical_id;
994
995
if (kvm_apic_broadcast(apic, mda))
996
return true;
997
998
logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
999
1000
if (apic_x2apic_mode(apic))
1001
return ((logical_id >> 16) == (mda >> 16))
1002
&& (logical_id & mda & 0xffff) != 0;
1003
1004
logical_id = GET_APIC_LOGICAL_ID(logical_id);
1005
1006
switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
1007
case APIC_DFR_FLAT:
1008
return (logical_id & mda) != 0;
1009
case APIC_DFR_CLUSTER:
1010
return ((logical_id >> 4) == (mda >> 4))
1011
&& (logical_id & mda & 0xf) != 0;
1012
default:
1013
return false;
1014
}
1015
}
1016
1017
/* The KVM local APIC implementation has two quirks:
1018
*
1019
* - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
1020
* in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
1021
* KVM doesn't do that aliasing.
1022
*
1023
* - in-kernel IOAPIC messages have to be delivered directly to
1024
* x2APIC, because the kernel does not support interrupt remapping.
1025
* In order to support broadcast without interrupt remapping, x2APIC
1026
* rewrites the destination of non-IPI messages from APIC_BROADCAST
1027
* to X2APIC_BROADCAST.
1028
*
1029
* The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is
1030
* important when userspace wants to use x2APIC-format MSIs, because
1031
* APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
1032
*/
1033
static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
1034
struct kvm_lapic *source, struct kvm_lapic *target)
1035
{
1036
bool ipi = source != NULL;
1037
1038
if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
1039
!ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
1040
return X2APIC_BROADCAST;
1041
1042
return dest_id;
1043
}
1044
1045
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
1046
int shorthand, unsigned int dest, int dest_mode)
1047
{
1048
struct kvm_lapic *target = vcpu->arch.apic;
1049
u32 mda = kvm_apic_mda(vcpu, dest, source, target);
1050
1051
ASSERT(target);
1052
switch (shorthand) {
1053
case APIC_DEST_NOSHORT:
1054
if (dest_mode == APIC_DEST_PHYSICAL)
1055
return kvm_apic_match_physical_addr(target, mda);
1056
else
1057
return kvm_apic_match_logical_addr(target, mda);
1058
case APIC_DEST_SELF:
1059
return target == source;
1060
case APIC_DEST_ALLINC:
1061
return true;
1062
case APIC_DEST_ALLBUT:
1063
return target != source;
1064
default:
1065
return false;
1066
}
1067
}
1068
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_match_dest);
1069
1070
static int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
1071
const unsigned long *bitmap, u32 bitmap_size)
1072
{
1073
int idx = find_nth_bit(bitmap, bitmap_size, vector % dest_vcpus);
1074
1075
BUG_ON(idx >= bitmap_size);
1076
return idx;
1077
}
1078
1079
static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
1080
{
1081
if (!kvm->arch.disabled_lapic_found) {
1082
kvm->arch.disabled_lapic_found = true;
1083
pr_info("Disabled LAPIC found during irq injection\n");
1084
}
1085
}
1086
1087
static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
1088
struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
1089
{
1090
if (kvm->arch.x2apic_broadcast_quirk_disabled) {
1091
if ((irq->dest_id == APIC_BROADCAST &&
1092
map->logical_mode != KVM_APIC_MODE_X2APIC))
1093
return true;
1094
if (irq->dest_id == X2APIC_BROADCAST)
1095
return true;
1096
} else {
1097
bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
1098
if (irq->dest_id == (x2apic_ipi ?
1099
X2APIC_BROADCAST : APIC_BROADCAST))
1100
return true;
1101
}
1102
1103
return false;
1104
}
1105
1106
static bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
1107
{
1108
return (irq->delivery_mode == APIC_DM_LOWEST || irq->msi_redir_hint);
1109
}
1110
1111
static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1112
{
1113
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1114
}
1115
1116
/* Return true if the interrupt can be handled by using *bitmap as index mask
1117
* for valid destinations in *dst array.
1118
* Return false if kvm_apic_map_get_dest_lapic did nothing useful.
1119
* Note: we may have zero kvm_lapic destinations when we return true, which
1120
* means that the interrupt should be dropped. In this case, *bitmap would be
1121
* zero and *dst undefined.
1122
*/
1123
static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
1124
struct kvm_lapic **src, struct kvm_lapic_irq *irq,
1125
struct kvm_apic_map *map, struct kvm_lapic ***dst,
1126
unsigned long *bitmap)
1127
{
1128
int i, lowest;
1129
1130
if (irq->shorthand == APIC_DEST_SELF && src) {
1131
*dst = src;
1132
*bitmap = 1;
1133
return true;
1134
} else if (irq->shorthand)
1135
return false;
1136
1137
if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
1138
return false;
1139
1140
if (irq->dest_mode == APIC_DEST_PHYSICAL) {
1141
if (irq->dest_id > map->max_apic_id) {
1142
*bitmap = 0;
1143
} else {
1144
u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
1145
*dst = &map->phys_map[dest_id];
1146
*bitmap = 1;
1147
}
1148
return true;
1149
}
1150
1151
*bitmap = 0;
1152
if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
1153
(u16 *)bitmap))
1154
return false;
1155
1156
if (!kvm_lowest_prio_delivery(irq))
1157
return true;
1158
1159
if (!vector_hashing_enabled) {
1160
lowest = -1;
1161
for_each_set_bit(i, bitmap, 16) {
1162
if (!(*dst)[i])
1163
continue;
1164
if (lowest < 0)
1165
lowest = i;
1166
else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
1167
(*dst)[lowest]->vcpu) < 0)
1168
lowest = i;
1169
}
1170
} else {
1171
if (!*bitmap)
1172
return true;
1173
1174
lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
1175
bitmap, 16);
1176
1177
if (!(*dst)[lowest]) {
1178
kvm_apic_disabled_lapic_found(kvm);
1179
*bitmap = 0;
1180
return true;
1181
}
1182
}
1183
1184
*bitmap = (lowest >= 0) ? 1 << lowest : 0;
1185
1186
return true;
1187
}
1188
1189
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
1190
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
1191
{
1192
struct kvm_apic_map *map;
1193
unsigned long bitmap;
1194
struct kvm_lapic **dst = NULL;
1195
int i;
1196
bool ret;
1197
1198
*r = -1;
1199
1200
if (irq->shorthand == APIC_DEST_SELF) {
1201
if (KVM_BUG_ON(!src, kvm)) {
1202
*r = 0;
1203
return true;
1204
}
1205
*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
1206
return true;
1207
}
1208
1209
rcu_read_lock();
1210
map = rcu_dereference(kvm->arch.apic_map);
1211
1212
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1213
if (ret) {
1214
*r = 0;
1215
for_each_set_bit(i, &bitmap, 16) {
1216
if (!dst[i])
1217
continue;
1218
*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1219
}
1220
}
1221
1222
rcu_read_unlock();
1223
return ret;
1224
}
1225
1226
/*
1227
* This routine tries to handle interrupts in posted mode, here is how
1228
* it deals with different cases:
1229
* - For single-destination interrupts, handle it in posted mode
1230
* - Else if vector hashing is enabled and it is a lowest-priority
1231
* interrupt, handle it in posted mode and use the following mechanism
1232
* to find the destination vCPU.
1233
* 1. For lowest-priority interrupts, store all the possible
1234
* destination vCPUs in an array.
1235
* 2. Use "guest vector % max number of destination vCPUs" to find
1236
* the right destination vCPU in the array for the lowest-priority
1237
* interrupt.
1238
* - Otherwise, use remapped mode to inject the interrupt.
1239
*/
1240
static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
1241
struct kvm_lapic_irq *irq,
1242
struct kvm_vcpu **dest_vcpu)
1243
{
1244
struct kvm_apic_map *map;
1245
unsigned long bitmap;
1246
struct kvm_lapic **dst = NULL;
1247
bool ret = false;
1248
1249
if (irq->shorthand)
1250
return false;
1251
1252
rcu_read_lock();
1253
map = rcu_dereference(kvm->arch.apic_map);
1254
1255
if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1256
hweight16(bitmap) == 1) {
1257
unsigned long i = find_first_bit(&bitmap, 16);
1258
1259
if (dst[i]) {
1260
*dest_vcpu = dst[i]->vcpu;
1261
ret = true;
1262
}
1263
}
1264
1265
rcu_read_unlock();
1266
return ret;
1267
}
1268
1269
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
1270
struct kvm_vcpu **dest_vcpu)
1271
{
1272
int r = 0;
1273
unsigned long i;
1274
struct kvm_vcpu *vcpu;
1275
1276
if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
1277
return true;
1278
1279
kvm_for_each_vcpu(i, vcpu, kvm) {
1280
if (!kvm_apic_present(vcpu))
1281
continue;
1282
1283
if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
1284
irq->dest_id, irq->dest_mode))
1285
continue;
1286
1287
if (++r == 2)
1288
return false;
1289
1290
*dest_vcpu = vcpu;
1291
}
1292
1293
return r == 1;
1294
}
1295
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_intr_is_single_vcpu);
1296
1297
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
1298
struct kvm_lapic_irq *irq, struct dest_map *dest_map)
1299
{
1300
int r = -1;
1301
struct kvm_vcpu *vcpu, *lowest = NULL;
1302
unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
1303
unsigned int dest_vcpus = 0;
1304
1305
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
1306
return r;
1307
1308
if (irq->dest_mode == APIC_DEST_PHYSICAL &&
1309
irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) {
1310
pr_info("apic: phys broadcast and lowest prio\n");
1311
irq->delivery_mode = APIC_DM_FIXED;
1312
}
1313
1314
memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
1315
1316
kvm_for_each_vcpu(i, vcpu, kvm) {
1317
if (!kvm_apic_present(vcpu))
1318
continue;
1319
1320
if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
1321
irq->dest_id, irq->dest_mode))
1322
continue;
1323
1324
if (!kvm_lowest_prio_delivery(irq)) {
1325
if (r < 0)
1326
r = 0;
1327
r += kvm_apic_set_irq(vcpu, irq, dest_map);
1328
} else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
1329
if (!vector_hashing_enabled) {
1330
if (!lowest)
1331
lowest = vcpu;
1332
else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
1333
lowest = vcpu;
1334
} else {
1335
__set_bit(i, dest_vcpu_bitmap);
1336
dest_vcpus++;
1337
}
1338
}
1339
}
1340
1341
if (dest_vcpus != 0) {
1342
int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
1343
dest_vcpu_bitmap, KVM_MAX_VCPUS);
1344
1345
lowest = kvm_get_vcpu(kvm, idx);
1346
}
1347
1348
if (lowest)
1349
r = kvm_apic_set_irq(lowest, irq, dest_map);
1350
1351
return r;
1352
}
1353
1354
/*
1355
* Add a pending IRQ into lapic.
1356
* Return 1 if successfully added and 0 if discarded.
1357
*/
1358
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1359
int vector, int level, int trig_mode,
1360
struct dest_map *dest_map)
1361
{
1362
int result = 0;
1363
struct kvm_vcpu *vcpu = apic->vcpu;
1364
1365
trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1366
trig_mode, vector);
1367
switch (delivery_mode) {
1368
case APIC_DM_LOWEST:
1369
vcpu->arch.apic_arb_prio++;
1370
fallthrough;
1371
case APIC_DM_FIXED:
1372
if (unlikely(trig_mode && !level))
1373
break;
1374
1375
/* FIXME add logic for vcpu on reset */
1376
if (unlikely(!apic_enabled(apic)))
1377
break;
1378
1379
result = 1;
1380
1381
if (dest_map) {
1382
__set_bit(vcpu->vcpu_id, dest_map->map);
1383
dest_map->vectors[vcpu->vcpu_id] = vector;
1384
}
1385
1386
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1387
if (trig_mode)
1388
apic_set_vector(vector, apic->regs + APIC_TMR);
1389
else
1390
apic_clear_vector(vector, apic->regs + APIC_TMR);
1391
}
1392
1393
kvm_x86_call(deliver_interrupt)(apic, delivery_mode,
1394
trig_mode, vector);
1395
break;
1396
1397
case APIC_DM_REMRD:
1398
result = 1;
1399
vcpu->arch.pv.pv_unhalted = 1;
1400
kvm_make_request(KVM_REQ_EVENT, vcpu);
1401
kvm_vcpu_kick(vcpu);
1402
break;
1403
1404
case APIC_DM_SMI:
1405
if (!kvm_inject_smi(vcpu)) {
1406
kvm_vcpu_kick(vcpu);
1407
result = 1;
1408
}
1409
break;
1410
1411
case APIC_DM_NMI:
1412
result = 1;
1413
kvm_inject_nmi(vcpu);
1414
kvm_vcpu_kick(vcpu);
1415
break;
1416
1417
case APIC_DM_INIT:
1418
if (!trig_mode || level) {
1419
result = 1;
1420
/* assumes that there are only KVM_APIC_INIT/SIPI */
1421
apic->pending_events = (1UL << KVM_APIC_INIT);
1422
kvm_make_request(KVM_REQ_EVENT, vcpu);
1423
kvm_vcpu_kick(vcpu);
1424
}
1425
break;
1426
1427
case APIC_DM_STARTUP:
1428
result = 1;
1429
apic->sipi_vector = vector;
1430
/* make sure sipi_vector is visible for the receiver */
1431
smp_wmb();
1432
set_bit(KVM_APIC_SIPI, &apic->pending_events);
1433
kvm_make_request(KVM_REQ_EVENT, vcpu);
1434
kvm_vcpu_kick(vcpu);
1435
break;
1436
1437
case APIC_DM_EXTINT:
1438
/*
1439
* Should only be called by kvm_apic_local_deliver() with LVT0,
1440
* before NMI watchdog was enabled. Already handled by
1441
* kvm_apic_accept_pic_intr().
1442
*/
1443
break;
1444
1445
default:
1446
printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1447
delivery_mode);
1448
break;
1449
}
1450
return result;
1451
}
1452
1453
/*
1454
* This routine identifies the destination vcpus mask meant to receive the
1455
* IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1456
* out the destination vcpus array and set the bitmap or it traverses to
1457
* each available vcpu to identify the same.
1458
*/
1459
void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1460
unsigned long *vcpu_bitmap)
1461
{
1462
struct kvm_lapic **dest_vcpu = NULL;
1463
struct kvm_lapic *src = NULL;
1464
struct kvm_apic_map *map;
1465
struct kvm_vcpu *vcpu;
1466
unsigned long bitmap, i;
1467
int vcpu_idx;
1468
bool ret;
1469
1470
rcu_read_lock();
1471
map = rcu_dereference(kvm->arch.apic_map);
1472
1473
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1474
&bitmap);
1475
if (ret) {
1476
for_each_set_bit(i, &bitmap, 16) {
1477
if (!dest_vcpu[i])
1478
continue;
1479
vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1480
__set_bit(vcpu_idx, vcpu_bitmap);
1481
}
1482
} else {
1483
kvm_for_each_vcpu(i, vcpu, kvm) {
1484
if (!kvm_apic_present(vcpu))
1485
continue;
1486
if (!kvm_apic_match_dest(vcpu, NULL,
1487
irq->shorthand,
1488
irq->dest_id,
1489
irq->dest_mode))
1490
continue;
1491
__set_bit(i, vcpu_bitmap);
1492
}
1493
}
1494
rcu_read_unlock();
1495
}
1496
1497
static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1498
{
1499
return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1500
}
1501
1502
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1503
{
1504
int __maybe_unused trigger_mode;
1505
1506
/* Eoi the ioapic only if the ioapic doesn't own the vector. */
1507
if (!kvm_ioapic_handles_vector(apic, vector))
1508
return;
1509
1510
/*
1511
* If the intercepted EOI is for an IRQ that was pending from previous
1512
* routing, then re-scan the I/O APIC routes as EOIs for the IRQ likely
1513
* no longer need to be intercepted.
1514
*/
1515
if (apic->vcpu->arch.highest_stale_pending_ioapic_eoi == vector)
1516
kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
1517
1518
/* Request a KVM exit to inform the userspace IOAPIC. */
1519
if (irqchip_split(apic->vcpu->kvm)) {
1520
apic->vcpu->arch.pending_ioapic_eoi = vector;
1521
kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1522
return;
1523
}
1524
1525
#ifdef CONFIG_KVM_IOAPIC
1526
if (apic_test_vector(vector, apic->regs + APIC_TMR))
1527
trigger_mode = IOAPIC_LEVEL_TRIG;
1528
else
1529
trigger_mode = IOAPIC_EDGE_TRIG;
1530
1531
kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1532
#endif
1533
}
1534
1535
static int apic_set_eoi(struct kvm_lapic *apic)
1536
{
1537
int vector = apic_find_highest_isr(apic);
1538
1539
trace_kvm_eoi(apic, vector);
1540
1541
/*
1542
* Not every write EOI will has corresponding ISR,
1543
* one example is when Kernel check timer on setup_IO_APIC
1544
*/
1545
if (vector == -1)
1546
return vector;
1547
1548
apic_clear_isr(vector, apic);
1549
apic_update_ppr(apic);
1550
1551
if (kvm_hv_synic_has_vector(apic->vcpu, vector))
1552
kvm_hv_synic_send_eoi(apic->vcpu, vector);
1553
1554
kvm_ioapic_send_eoi(apic, vector);
1555
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1556
return vector;
1557
}
1558
1559
/*
1560
* this interface assumes a trap-like exit, which has already finished
1561
* desired side effect including vISR and vPPR update.
1562
*/
1563
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1564
{
1565
struct kvm_lapic *apic = vcpu->arch.apic;
1566
1567
trace_kvm_eoi(apic, vector);
1568
1569
kvm_ioapic_send_eoi(apic, vector);
1570
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1571
}
1572
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_set_eoi_accelerated);
1573
1574
static void kvm_icr_to_lapic_irq(struct kvm_lapic *apic, u32 icr_low,
1575
u32 icr_high, struct kvm_lapic_irq *irq)
1576
{
1577
/* KVM has no delay and should always clear the BUSY/PENDING flag. */
1578
WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
1579
1580
irq->vector = icr_low & APIC_VECTOR_MASK;
1581
irq->delivery_mode = icr_low & APIC_MODE_MASK;
1582
irq->dest_mode = icr_low & APIC_DEST_MASK;
1583
irq->level = (icr_low & APIC_INT_ASSERT) != 0;
1584
irq->trig_mode = icr_low & APIC_INT_LEVELTRIG;
1585
irq->shorthand = icr_low & APIC_SHORT_MASK;
1586
irq->msi_redir_hint = false;
1587
if (apic_x2apic_mode(apic))
1588
irq->dest_id = icr_high;
1589
else
1590
irq->dest_id = GET_XAPIC_DEST_FIELD(icr_high);
1591
}
1592
1593
void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1594
{
1595
struct kvm_lapic_irq irq;
1596
1597
kvm_icr_to_lapic_irq(apic, icr_low, icr_high, &irq);
1598
1599
trace_kvm_apic_ipi(icr_low, irq.dest_id);
1600
1601
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1602
}
1603
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_send_ipi);
1604
1605
static u32 apic_get_tmcct(struct kvm_lapic *apic)
1606
{
1607
ktime_t remaining, now;
1608
s64 ns;
1609
1610
ASSERT(apic != NULL);
1611
1612
/* if initial count is 0, current count should also be 0 */
1613
if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1614
apic->lapic_timer.period == 0)
1615
return 0;
1616
1617
now = ktime_get();
1618
remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1619
if (ktime_to_ns(remaining) < 0)
1620
remaining = 0;
1621
1622
ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1623
return div64_u64(ns, (apic->vcpu->kvm->arch.apic_bus_cycle_ns *
1624
apic->divide_count));
1625
}
1626
1627
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1628
{
1629
struct kvm_vcpu *vcpu = apic->vcpu;
1630
struct kvm_run *run = vcpu->run;
1631
1632
kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1633
run->tpr_access.rip = kvm_rip_read(vcpu);
1634
run->tpr_access.is_write = write;
1635
}
1636
1637
static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1638
{
1639
if (apic->vcpu->arch.tpr_access_reporting)
1640
__report_tpr_access(apic, write);
1641
}
1642
1643
static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1644
{
1645
u32 val = 0;
1646
1647
if (offset >= LAPIC_MMIO_LENGTH)
1648
return 0;
1649
1650
switch (offset) {
1651
case APIC_ARBPRI:
1652
break;
1653
1654
case APIC_TMCCT: /* Timer CCR */
1655
if (apic_lvtt_tscdeadline(apic))
1656
return 0;
1657
1658
val = apic_get_tmcct(apic);
1659
break;
1660
case APIC_PROCPRI:
1661
apic_update_ppr(apic);
1662
val = kvm_lapic_get_reg(apic, offset);
1663
break;
1664
case APIC_TASKPRI:
1665
report_tpr_access(apic, false);
1666
fallthrough;
1667
default:
1668
val = kvm_lapic_get_reg(apic, offset);
1669
break;
1670
}
1671
1672
return val;
1673
}
1674
1675
static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1676
{
1677
return container_of(dev, struct kvm_lapic, dev);
1678
}
1679
1680
#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
1681
#define APIC_REGS_MASK(first, count) \
1682
(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1683
1684
u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic)
1685
{
1686
/* Leave bits '0' for reserved and write-only registers. */
1687
u64 valid_reg_mask =
1688
APIC_REG_MASK(APIC_ID) |
1689
APIC_REG_MASK(APIC_LVR) |
1690
APIC_REG_MASK(APIC_TASKPRI) |
1691
APIC_REG_MASK(APIC_PROCPRI) |
1692
APIC_REG_MASK(APIC_LDR) |
1693
APIC_REG_MASK(APIC_SPIV) |
1694
APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1695
APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1696
APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1697
APIC_REG_MASK(APIC_ESR) |
1698
APIC_REG_MASK(APIC_ICR) |
1699
APIC_REG_MASK(APIC_LVTT) |
1700
APIC_REG_MASK(APIC_LVTTHMR) |
1701
APIC_REG_MASK(APIC_LVTPC) |
1702
APIC_REG_MASK(APIC_LVT0) |
1703
APIC_REG_MASK(APIC_LVT1) |
1704
APIC_REG_MASK(APIC_LVTERR) |
1705
APIC_REG_MASK(APIC_TMICT) |
1706
APIC_REG_MASK(APIC_TMCCT) |
1707
APIC_REG_MASK(APIC_TDCR);
1708
1709
if (kvm_lapic_lvt_supported(apic, LVT_CMCI))
1710
valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI);
1711
1712
/* ARBPRI, DFR, and ICR2 are not valid in x2APIC mode. */
1713
if (!apic_x2apic_mode(apic))
1714
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
1715
APIC_REG_MASK(APIC_DFR) |
1716
APIC_REG_MASK(APIC_ICR2);
1717
1718
return valid_reg_mask;
1719
}
1720
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_readable_reg_mask);
1721
1722
static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1723
void *data)
1724
{
1725
unsigned char alignment = offset & 0xf;
1726
u32 result;
1727
1728
/*
1729
* WARN if KVM reads ICR in x2APIC mode, as it's an 8-byte register in
1730
* x2APIC and needs to be manually handled by the caller.
1731
*/
1732
WARN_ON_ONCE(apic_x2apic_mode(apic) && offset == APIC_ICR);
1733
1734
if (alignment + len > 4)
1735
return 1;
1736
1737
if (offset > 0x3f0 ||
1738
!(kvm_lapic_readable_reg_mask(apic) & APIC_REG_MASK(offset)))
1739
return 1;
1740
1741
result = __apic_read(apic, offset & ~0xf);
1742
1743
trace_kvm_apic_read(offset, result);
1744
1745
switch (len) {
1746
case 1:
1747
case 2:
1748
case 4:
1749
memcpy(data, (char *)&result + alignment, len);
1750
break;
1751
default:
1752
printk(KERN_ERR "Local APIC read with len = %x, "
1753
"should be 1,2, or 4 instead\n", len);
1754
break;
1755
}
1756
return 0;
1757
}
1758
1759
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1760
{
1761
return addr >= apic->base_address &&
1762
addr < apic->base_address + LAPIC_MMIO_LENGTH;
1763
}
1764
1765
static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1766
gpa_t address, int len, void *data)
1767
{
1768
struct kvm_lapic *apic = to_lapic(this);
1769
u32 offset = address - apic->base_address;
1770
1771
if (!apic_mmio_in_range(apic, address))
1772
return -EOPNOTSUPP;
1773
1774
if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1775
if (!kvm_check_has_quirk(vcpu->kvm,
1776
KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1777
return -EOPNOTSUPP;
1778
1779
memset(data, 0xff, len);
1780
return 0;
1781
}
1782
1783
kvm_lapic_reg_read(apic, offset, len, data);
1784
1785
return 0;
1786
}
1787
1788
static void update_divide_count(struct kvm_lapic *apic)
1789
{
1790
u32 tmp1, tmp2, tdcr;
1791
1792
tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1793
tmp1 = tdcr & 0xf;
1794
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1795
apic->divide_count = 0x1 << (tmp2 & 0x7);
1796
}
1797
1798
static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1799
{
1800
/*
1801
* Do not allow the guest to program periodic timers with small
1802
* interval, since the hrtimers are not throttled by the host
1803
* scheduler.
1804
*/
1805
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1806
s64 min_period = min_timer_period_us * 1000LL;
1807
1808
if (apic->lapic_timer.period < min_period) {
1809
pr_info_once(
1810
"vcpu %i: requested %lld ns "
1811
"lapic timer period limited to %lld ns\n",
1812
apic->vcpu->vcpu_id,
1813
apic->lapic_timer.period, min_period);
1814
apic->lapic_timer.period = min_period;
1815
}
1816
}
1817
}
1818
1819
static void cancel_hv_timer(struct kvm_lapic *apic);
1820
1821
static void cancel_apic_timer(struct kvm_lapic *apic)
1822
{
1823
hrtimer_cancel(&apic->lapic_timer.timer);
1824
preempt_disable();
1825
if (apic->lapic_timer.hv_timer_in_use)
1826
cancel_hv_timer(apic);
1827
preempt_enable();
1828
atomic_set(&apic->lapic_timer.pending, 0);
1829
}
1830
1831
static void apic_update_lvtt(struct kvm_lapic *apic)
1832
{
1833
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1834
apic->lapic_timer.timer_mode_mask;
1835
1836
if (apic->lapic_timer.timer_mode != timer_mode) {
1837
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1838
APIC_LVT_TIMER_TSCDEADLINE)) {
1839
cancel_apic_timer(apic);
1840
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1841
apic->lapic_timer.period = 0;
1842
apic->lapic_timer.tscdeadline = 0;
1843
}
1844
apic->lapic_timer.timer_mode = timer_mode;
1845
limit_periodic_timer_frequency(apic);
1846
}
1847
}
1848
1849
/*
1850
* On APICv, this test will cause a busy wait
1851
* during a higher-priority task.
1852
*/
1853
1854
static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1855
{
1856
struct kvm_lapic *apic = vcpu->arch.apic;
1857
u32 reg;
1858
1859
/*
1860
* Assume a timer IRQ was "injected" if the APIC is protected. KVM's
1861
* copy of the vIRR is bogus, it's the responsibility of the caller to
1862
* precisely check whether or not a timer IRQ is pending.
1863
*/
1864
if (apic->guest_apic_protected)
1865
return true;
1866
1867
reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1868
if (kvm_apic_hw_enabled(apic)) {
1869
int vec = reg & APIC_VECTOR_MASK;
1870
void *bitmap = apic->regs + APIC_ISR;
1871
1872
if (apic->apicv_active)
1873
bitmap = apic->regs + APIC_IRR;
1874
1875
if (apic_test_vector(vec, bitmap))
1876
return true;
1877
}
1878
return false;
1879
}
1880
1881
static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1882
{
1883
u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1884
1885
/*
1886
* If the guest TSC is running at a different ratio than the host, then
1887
* convert the delay to nanoseconds to achieve an accurate delay. Note
1888
* that __delay() uses delay_tsc whenever the hardware has TSC, thus
1889
* always for VMX enabled hardware.
1890
*/
1891
if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) {
1892
__delay(min(guest_cycles,
1893
nsec_to_cycles(vcpu, timer_advance_ns)));
1894
} else {
1895
u64 delay_ns = guest_cycles * 1000000ULL;
1896
do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1897
ndelay(min_t(u32, delay_ns, timer_advance_ns));
1898
}
1899
}
1900
1901
static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1902
s64 advance_expire_delta)
1903
{
1904
struct kvm_lapic *apic = vcpu->arch.apic;
1905
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1906
u64 ns;
1907
1908
/* Do not adjust for tiny fluctuations or large random spikes. */
1909
if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1910
abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1911
return;
1912
1913
/* too early */
1914
if (advance_expire_delta < 0) {
1915
ns = -advance_expire_delta * 1000000ULL;
1916
do_div(ns, vcpu->arch.virtual_tsc_khz);
1917
timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1918
} else {
1919
/* too late */
1920
ns = advance_expire_delta * 1000000ULL;
1921
do_div(ns, vcpu->arch.virtual_tsc_khz);
1922
timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1923
}
1924
1925
if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1926
timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1927
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1928
}
1929
1930
static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1931
{
1932
struct kvm_lapic *apic = vcpu->arch.apic;
1933
u64 guest_tsc, tsc_deadline;
1934
1935
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1936
apic->lapic_timer.expired_tscdeadline = 0;
1937
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1938
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1939
1940
adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1941
1942
/*
1943
* If the timer fired early, reread the TSC to account for the overhead
1944
* of the above adjustment to avoid waiting longer than is necessary.
1945
*/
1946
if (guest_tsc < tsc_deadline)
1947
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1948
1949
if (guest_tsc < tsc_deadline)
1950
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1951
}
1952
1953
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1954
{
1955
if (lapic_in_kernel(vcpu) &&
1956
vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1957
vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1958
lapic_timer_int_injected(vcpu))
1959
__kvm_wait_lapic_expire(vcpu);
1960
}
1961
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_wait_lapic_expire);
1962
1963
static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1964
{
1965
struct kvm_timer *ktimer = &apic->lapic_timer;
1966
1967
kvm_apic_local_deliver(apic, APIC_LVTT);
1968
if (apic_lvtt_tscdeadline(apic)) {
1969
ktimer->tscdeadline = 0;
1970
} else if (apic_lvtt_oneshot(apic)) {
1971
ktimer->tscdeadline = 0;
1972
ktimer->target_expiration = 0;
1973
}
1974
}
1975
1976
static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1977
{
1978
struct kvm_vcpu *vcpu = apic->vcpu;
1979
struct kvm_timer *ktimer = &apic->lapic_timer;
1980
1981
if (atomic_read(&apic->lapic_timer.pending))
1982
return;
1983
1984
if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1985
ktimer->expired_tscdeadline = ktimer->tscdeadline;
1986
1987
if (!from_timer_fn && apic->apicv_active) {
1988
WARN_ON(kvm_get_running_vcpu() != vcpu);
1989
kvm_apic_inject_pending_timer_irqs(apic);
1990
return;
1991
}
1992
1993
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1994
/*
1995
* Ensure the guest's timer has truly expired before posting an
1996
* interrupt. Open code the relevant checks to avoid querying
1997
* lapic_timer_int_injected(), which will be false since the
1998
* interrupt isn't yet injected. Waiting until after injecting
1999
* is not an option since that won't help a posted interrupt.
2000
*/
2001
if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
2002
vcpu->arch.apic->lapic_timer.timer_advance_ns)
2003
__kvm_wait_lapic_expire(vcpu);
2004
kvm_apic_inject_pending_timer_irqs(apic);
2005
return;
2006
}
2007
2008
atomic_inc(&apic->lapic_timer.pending);
2009
kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
2010
if (from_timer_fn)
2011
kvm_vcpu_kick(vcpu);
2012
}
2013
2014
static void start_sw_tscdeadline(struct kvm_lapic *apic)
2015
{
2016
struct kvm_timer *ktimer = &apic->lapic_timer;
2017
u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
2018
u64 ns = 0;
2019
ktime_t expire;
2020
struct kvm_vcpu *vcpu = apic->vcpu;
2021
u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz;
2022
unsigned long flags;
2023
ktime_t now;
2024
2025
if (unlikely(!tscdeadline || !this_tsc_khz))
2026
return;
2027
2028
local_irq_save(flags);
2029
2030
now = ktime_get();
2031
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
2032
2033
ns = (tscdeadline - guest_tsc) * 1000000ULL;
2034
do_div(ns, this_tsc_khz);
2035
2036
if (likely(tscdeadline > guest_tsc) &&
2037
likely(ns > apic->lapic_timer.timer_advance_ns)) {
2038
expire = ktime_add_ns(now, ns);
2039
expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
2040
hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
2041
} else
2042
apic_timer_expired(apic, false);
2043
2044
local_irq_restore(flags);
2045
}
2046
2047
static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
2048
{
2049
return (u64)tmict * apic->vcpu->kvm->arch.apic_bus_cycle_ns *
2050
(u64)apic->divide_count;
2051
}
2052
2053
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
2054
{
2055
ktime_t now, remaining;
2056
u64 ns_remaining_old, ns_remaining_new;
2057
2058
apic->lapic_timer.period =
2059
tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
2060
limit_periodic_timer_frequency(apic);
2061
2062
now = ktime_get();
2063
remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
2064
if (ktime_to_ns(remaining) < 0)
2065
remaining = 0;
2066
2067
ns_remaining_old = ktime_to_ns(remaining);
2068
ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
2069
apic->divide_count, old_divisor);
2070
2071
apic->lapic_timer.tscdeadline +=
2072
nsec_to_cycles(apic->vcpu, ns_remaining_new) -
2073
nsec_to_cycles(apic->vcpu, ns_remaining_old);
2074
apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
2075
}
2076
2077
static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
2078
{
2079
ktime_t now;
2080
u64 tscl = rdtsc();
2081
s64 deadline;
2082
2083
now = ktime_get();
2084
apic->lapic_timer.period =
2085
tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
2086
2087
if (!apic->lapic_timer.period) {
2088
apic->lapic_timer.tscdeadline = 0;
2089
return false;
2090
}
2091
2092
limit_periodic_timer_frequency(apic);
2093
deadline = apic->lapic_timer.period;
2094
2095
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
2096
if (unlikely(count_reg != APIC_TMICT)) {
2097
deadline = tmict_to_ns(apic,
2098
kvm_lapic_get_reg(apic, count_reg));
2099
if (unlikely(deadline <= 0)) {
2100
if (apic_lvtt_period(apic))
2101
deadline = apic->lapic_timer.period;
2102
else
2103
deadline = 0;
2104
}
2105
else if (unlikely(deadline > apic->lapic_timer.period)) {
2106
pr_info_ratelimited(
2107
"vcpu %i: requested lapic timer restore with "
2108
"starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
2109
"Using initial count to start timer.\n",
2110
apic->vcpu->vcpu_id,
2111
count_reg,
2112
kvm_lapic_get_reg(apic, count_reg),
2113
deadline, apic->lapic_timer.period);
2114
kvm_lapic_set_reg(apic, count_reg, 0);
2115
deadline = apic->lapic_timer.period;
2116
}
2117
}
2118
}
2119
2120
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
2121
nsec_to_cycles(apic->vcpu, deadline);
2122
apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
2123
2124
return true;
2125
}
2126
2127
static void advance_periodic_target_expiration(struct kvm_lapic *apic)
2128
{
2129
ktime_t now = ktime_get();
2130
u64 tscl = rdtsc();
2131
ktime_t delta;
2132
2133
/*
2134
* Synchronize both deadlines to the same time source or
2135
* differences in the periods (caused by differences in the
2136
* underlying clocks or numerical approximation errors) will
2137
* cause the two to drift apart over time as the errors
2138
* accumulate.
2139
*/
2140
apic->lapic_timer.target_expiration =
2141
ktime_add_ns(apic->lapic_timer.target_expiration,
2142
apic->lapic_timer.period);
2143
delta = ktime_sub(apic->lapic_timer.target_expiration, now);
2144
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
2145
nsec_to_cycles(apic->vcpu, delta);
2146
}
2147
2148
static void start_sw_period(struct kvm_lapic *apic)
2149
{
2150
if (!apic->lapic_timer.period)
2151
return;
2152
2153
if (ktime_after(ktime_get(),
2154
apic->lapic_timer.target_expiration)) {
2155
apic_timer_expired(apic, false);
2156
2157
if (apic_lvtt_oneshot(apic))
2158
return;
2159
2160
advance_periodic_target_expiration(apic);
2161
}
2162
2163
hrtimer_start(&apic->lapic_timer.timer,
2164
apic->lapic_timer.target_expiration,
2165
HRTIMER_MODE_ABS_HARD);
2166
}
2167
2168
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
2169
{
2170
if (!lapic_in_kernel(vcpu))
2171
return false;
2172
2173
return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
2174
}
2175
2176
static void cancel_hv_timer(struct kvm_lapic *apic)
2177
{
2178
WARN_ON(preemptible());
2179
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2180
kvm_x86_call(cancel_hv_timer)(apic->vcpu);
2181
apic->lapic_timer.hv_timer_in_use = false;
2182
}
2183
2184
static bool start_hv_timer(struct kvm_lapic *apic)
2185
{
2186
struct kvm_timer *ktimer = &apic->lapic_timer;
2187
struct kvm_vcpu *vcpu = apic->vcpu;
2188
bool expired;
2189
2190
WARN_ON(preemptible());
2191
if (!kvm_can_use_hv_timer(vcpu))
2192
return false;
2193
2194
if (!ktimer->tscdeadline)
2195
return false;
2196
2197
if (kvm_x86_call(set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
2198
return false;
2199
2200
ktimer->hv_timer_in_use = true;
2201
hrtimer_cancel(&ktimer->timer);
2202
2203
/*
2204
* To simplify handling the periodic timer, leave the hv timer running
2205
* even if the deadline timer has expired, i.e. rely on the resulting
2206
* VM-Exit to recompute the periodic timer's target expiration.
2207
*/
2208
if (!apic_lvtt_period(apic)) {
2209
/*
2210
* Cancel the hv timer if the sw timer fired while the hv timer
2211
* was being programmed, or if the hv timer itself expired.
2212
*/
2213
if (atomic_read(&ktimer->pending)) {
2214
cancel_hv_timer(apic);
2215
} else if (expired) {
2216
apic_timer_expired(apic, false);
2217
cancel_hv_timer(apic);
2218
}
2219
}
2220
2221
trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
2222
2223
return true;
2224
}
2225
2226
static void start_sw_timer(struct kvm_lapic *apic)
2227
{
2228
struct kvm_timer *ktimer = &apic->lapic_timer;
2229
2230
WARN_ON(preemptible());
2231
if (apic->lapic_timer.hv_timer_in_use)
2232
cancel_hv_timer(apic);
2233
if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
2234
return;
2235
2236
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2237
start_sw_period(apic);
2238
else if (apic_lvtt_tscdeadline(apic))
2239
start_sw_tscdeadline(apic);
2240
trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
2241
}
2242
2243
static void restart_apic_timer(struct kvm_lapic *apic)
2244
{
2245
preempt_disable();
2246
2247
if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
2248
goto out;
2249
2250
if (!start_hv_timer(apic))
2251
start_sw_timer(apic);
2252
out:
2253
preempt_enable();
2254
}
2255
2256
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
2257
{
2258
struct kvm_lapic *apic = vcpu->arch.apic;
2259
2260
preempt_disable();
2261
/* If the preempt notifier has already run, it also called apic_timer_expired */
2262
if (!apic->lapic_timer.hv_timer_in_use)
2263
goto out;
2264
WARN_ON(kvm_vcpu_is_blocking(vcpu));
2265
apic_timer_expired(apic, false);
2266
cancel_hv_timer(apic);
2267
2268
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
2269
advance_periodic_target_expiration(apic);
2270
restart_apic_timer(apic);
2271
}
2272
out:
2273
preempt_enable();
2274
}
2275
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_expired_hv_timer);
2276
2277
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
2278
{
2279
restart_apic_timer(vcpu->arch.apic);
2280
}
2281
2282
void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
2283
{
2284
struct kvm_lapic *apic = vcpu->arch.apic;
2285
2286
preempt_disable();
2287
/* Possibly the TSC deadline timer is not enabled yet */
2288
if (apic->lapic_timer.hv_timer_in_use)
2289
start_sw_timer(apic);
2290
preempt_enable();
2291
}
2292
2293
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
2294
{
2295
struct kvm_lapic *apic = vcpu->arch.apic;
2296
2297
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2298
restart_apic_timer(apic);
2299
}
2300
2301
static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
2302
{
2303
atomic_set(&apic->lapic_timer.pending, 0);
2304
2305
if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2306
&& !set_target_expiration(apic, count_reg))
2307
return;
2308
2309
restart_apic_timer(apic);
2310
}
2311
2312
static void start_apic_timer(struct kvm_lapic *apic)
2313
{
2314
__start_apic_timer(apic, APIC_TMICT);
2315
}
2316
2317
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
2318
{
2319
bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
2320
2321
if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
2322
apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
2323
if (lvt0_in_nmi_mode) {
2324
atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2325
} else
2326
atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2327
}
2328
}
2329
2330
static int get_lvt_index(u32 reg)
2331
{
2332
if (reg == APIC_LVTCMCI)
2333
return LVT_CMCI;
2334
if (reg < APIC_LVTT || reg > APIC_LVTERR)
2335
return -1;
2336
return array_index_nospec(
2337
(reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES);
2338
}
2339
2340
static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
2341
{
2342
int ret = 0;
2343
2344
trace_kvm_apic_write(reg, val);
2345
2346
switch (reg) {
2347
case APIC_ID: /* Local APIC ID */
2348
if (!apic_x2apic_mode(apic)) {
2349
kvm_apic_set_xapic_id(apic, val >> 24);
2350
} else {
2351
ret = 1;
2352
}
2353
break;
2354
2355
case APIC_TASKPRI:
2356
report_tpr_access(apic, true);
2357
apic_set_tpr(apic, val & 0xff);
2358
break;
2359
2360
case APIC_EOI:
2361
apic_set_eoi(apic);
2362
break;
2363
2364
case APIC_LDR:
2365
if (!apic_x2apic_mode(apic))
2366
kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2367
else
2368
ret = 1;
2369
break;
2370
2371
case APIC_DFR:
2372
if (!apic_x2apic_mode(apic))
2373
kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2374
else
2375
ret = 1;
2376
break;
2377
2378
case APIC_SPIV: {
2379
u32 mask = 0x3ff;
2380
if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2381
mask |= APIC_SPIV_DIRECTED_EOI;
2382
apic_set_spiv(apic, val & mask);
2383
if (!(val & APIC_SPIV_APIC_ENABLED)) {
2384
int i;
2385
2386
for (i = 0; i < apic->nr_lvt_entries; i++) {
2387
kvm_lapic_set_reg(apic, APIC_LVTx(i),
2388
kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED);
2389
}
2390
apic_update_lvtt(apic);
2391
atomic_set(&apic->lapic_timer.pending, 0);
2392
2393
}
2394
break;
2395
}
2396
case APIC_ICR:
2397
WARN_ON_ONCE(apic_x2apic_mode(apic));
2398
2399
/* No delay here, so we always clear the pending bit */
2400
val &= ~APIC_ICR_BUSY;
2401
kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2402
kvm_lapic_set_reg(apic, APIC_ICR, val);
2403
break;
2404
case APIC_ICR2:
2405
if (apic_x2apic_mode(apic))
2406
ret = 1;
2407
else
2408
kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000);
2409
break;
2410
2411
case APIC_LVT0:
2412
apic_manage_nmi_watchdog(apic, val);
2413
fallthrough;
2414
case APIC_LVTTHMR:
2415
case APIC_LVTPC:
2416
case APIC_LVT1:
2417
case APIC_LVTERR:
2418
case APIC_LVTCMCI: {
2419
u32 index = get_lvt_index(reg);
2420
if (!kvm_lapic_lvt_supported(apic, index)) {
2421
ret = 1;
2422
break;
2423
}
2424
if (!kvm_apic_sw_enabled(apic))
2425
val |= APIC_LVT_MASKED;
2426
val &= apic_lvt_mask[index];
2427
kvm_lapic_set_reg(apic, reg, val);
2428
break;
2429
}
2430
2431
case APIC_LVTT:
2432
if (!kvm_apic_sw_enabled(apic))
2433
val |= APIC_LVT_MASKED;
2434
val &= (apic_lvt_mask[LVT_TIMER] | apic->lapic_timer.timer_mode_mask);
2435
kvm_lapic_set_reg(apic, APIC_LVTT, val);
2436
apic_update_lvtt(apic);
2437
break;
2438
2439
case APIC_TMICT:
2440
if (apic_lvtt_tscdeadline(apic))
2441
break;
2442
2443
cancel_apic_timer(apic);
2444
kvm_lapic_set_reg(apic, APIC_TMICT, val);
2445
start_apic_timer(apic);
2446
break;
2447
2448
case APIC_TDCR: {
2449
uint32_t old_divisor = apic->divide_count;
2450
2451
kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2452
update_divide_count(apic);
2453
if (apic->divide_count != old_divisor &&
2454
apic->lapic_timer.period) {
2455
hrtimer_cancel(&apic->lapic_timer.timer);
2456
update_target_expiration(apic, old_divisor);
2457
restart_apic_timer(apic);
2458
}
2459
break;
2460
}
2461
case APIC_ESR:
2462
if (apic_x2apic_mode(apic) && val != 0)
2463
ret = 1;
2464
break;
2465
2466
case APIC_SELF_IPI:
2467
/*
2468
* Self-IPI exists only when x2APIC is enabled. Bits 7:0 hold
2469
* the vector, everything else is reserved.
2470
*/
2471
if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
2472
ret = 1;
2473
else
2474
kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
2475
break;
2476
default:
2477
ret = 1;
2478
break;
2479
}
2480
2481
/*
2482
* Recalculate APIC maps if necessary, e.g. if the software enable bit
2483
* was toggled, the APIC ID changed, etc... The maps are marked dirty
2484
* on relevant changes, i.e. this is a nop for most writes.
2485
*/
2486
kvm_recalculate_apic_map(apic->vcpu->kvm);
2487
2488
return ret;
2489
}
2490
2491
static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2492
gpa_t address, int len, const void *data)
2493
{
2494
struct kvm_lapic *apic = to_lapic(this);
2495
unsigned int offset = address - apic->base_address;
2496
u32 val;
2497
2498
if (!apic_mmio_in_range(apic, address))
2499
return -EOPNOTSUPP;
2500
2501
if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2502
if (!kvm_check_has_quirk(vcpu->kvm,
2503
KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2504
return -EOPNOTSUPP;
2505
2506
return 0;
2507
}
2508
2509
/*
2510
* APIC register must be aligned on 128-bits boundary.
2511
* 32/64/128 bits registers must be accessed thru 32 bits.
2512
* Refer SDM 8.4.1
2513
*/
2514
if (len != 4 || (offset & 0xf))
2515
return 0;
2516
2517
val = *(u32*)data;
2518
2519
kvm_lapic_reg_write(apic, offset & 0xff0, val);
2520
2521
return 0;
2522
}
2523
2524
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2525
{
2526
kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2527
}
2528
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_set_eoi);
2529
2530
#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))
2531
2532
static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast)
2533
{
2534
if (data & X2APIC_ICR_RESERVED_BITS)
2535
return 1;
2536
2537
/*
2538
* The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but
2539
* only AMD requires it to be zero, Intel essentially just ignores the
2540
* bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled,
2541
* the CPU performs the reserved bits checks, i.e. the underlying CPU
2542
* behavior will "win". Arbitrarily clear the BUSY bit, as there is no
2543
* sane way to provide consistent behavior with respect to hardware.
2544
*/
2545
data &= ~APIC_ICR_BUSY;
2546
2547
if (fast) {
2548
struct kvm_lapic_irq irq;
2549
int ignored;
2550
2551
kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq);
2552
2553
if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq,
2554
&ignored, NULL))
2555
return -EWOULDBLOCK;
2556
2557
trace_kvm_apic_ipi((u32)data, irq.dest_id);
2558
} else {
2559
kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
2560
}
2561
if (kvm_x86_ops.x2apic_icr_is_split) {
2562
kvm_lapic_set_reg(apic, APIC_ICR, data);
2563
kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
2564
} else {
2565
kvm_lapic_set_reg64(apic, APIC_ICR, data);
2566
}
2567
trace_kvm_apic_write(APIC_ICR, data);
2568
return 0;
2569
}
2570
2571
static int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
2572
{
2573
return __kvm_x2apic_icr_write(apic, data, false);
2574
}
2575
2576
int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data)
2577
{
2578
return __kvm_x2apic_icr_write(apic, data, true);
2579
}
2580
2581
static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
2582
{
2583
if (kvm_x86_ops.x2apic_icr_is_split)
2584
return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
2585
(u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
2586
2587
return kvm_lapic_get_reg64(apic, APIC_ICR);
2588
}
2589
2590
/* emulate APIC access in a trap manner */
2591
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2592
{
2593
struct kvm_lapic *apic = vcpu->arch.apic;
2594
2595
/*
2596
* ICR is a single 64-bit register when x2APIC is enabled, all others
2597
* registers hold 32-bit values. For legacy xAPIC, ICR writes need to
2598
* go down the common path to get the upper half from ICR2.
2599
*
2600
* Note, using the write helpers may incur an unnecessary write to the
2601
* virtual APIC state, but KVM needs to conditionally modify the value
2602
* in certain cases, e.g. to clear the ICR busy bit. The cost of extra
2603
* conditional branches is likely a wash relative to the cost of the
2604
* maybe-unecessary write, and both are in the noise anyways.
2605
*/
2606
if (apic_x2apic_mode(apic) && offset == APIC_ICR)
2607
WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
2608
else
2609
kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
2610
}
2611
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_write_nodecode);
2612
2613
void kvm_free_lapic(struct kvm_vcpu *vcpu)
2614
{
2615
struct kvm_lapic *apic = vcpu->arch.apic;
2616
2617
if (!vcpu->arch.apic) {
2618
static_branch_dec(&kvm_has_noapic_vcpu);
2619
return;
2620
}
2621
2622
hrtimer_cancel(&apic->lapic_timer.timer);
2623
2624
if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2625
static_branch_slow_dec_deferred(&apic_hw_disabled);
2626
2627
if (!apic->sw_enabled)
2628
static_branch_slow_dec_deferred(&apic_sw_disabled);
2629
2630
if (apic->regs)
2631
free_page((unsigned long)apic->regs);
2632
2633
kfree(apic);
2634
}
2635
2636
/*
2637
*----------------------------------------------------------------------
2638
* LAPIC interface
2639
*----------------------------------------------------------------------
2640
*/
2641
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2642
{
2643
struct kvm_lapic *apic = vcpu->arch.apic;
2644
2645
if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2646
return 0;
2647
2648
return apic->lapic_timer.tscdeadline;
2649
}
2650
2651
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2652
{
2653
struct kvm_lapic *apic = vcpu->arch.apic;
2654
2655
if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2656
return;
2657
2658
hrtimer_cancel(&apic->lapic_timer.timer);
2659
apic->lapic_timer.tscdeadline = data;
2660
start_apic_timer(apic);
2661
}
2662
2663
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2664
{
2665
apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
2666
}
2667
2668
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2669
{
2670
u64 tpr;
2671
2672
tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2673
2674
return (tpr & 0xf0) >> 4;
2675
}
2676
2677
static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value)
2678
{
2679
u64 old_value = vcpu->arch.apic_base;
2680
struct kvm_lapic *apic = vcpu->arch.apic;
2681
2682
vcpu->arch.apic_base = value;
2683
2684
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2685
vcpu->arch.cpuid_dynamic_bits_dirty = true;
2686
2687
if (!apic)
2688
return;
2689
2690
/* update jump label if enable bit changes */
2691
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2692
if (value & MSR_IA32_APICBASE_ENABLE) {
2693
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2694
static_branch_slow_dec_deferred(&apic_hw_disabled);
2695
/* Check if there are APF page ready requests pending */
2696
kvm_make_request(KVM_REQ_APF_READY, vcpu);
2697
} else {
2698
static_branch_inc(&apic_hw_disabled.key);
2699
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2700
}
2701
}
2702
2703
if ((old_value ^ value) & X2APIC_ENABLE) {
2704
if (value & X2APIC_ENABLE)
2705
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2706
else if (value & MSR_IA32_APICBASE_ENABLE)
2707
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2708
}
2709
2710
if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
2711
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
2712
kvm_x86_call(set_virtual_apic_mode)(vcpu);
2713
}
2714
2715
apic->base_address = apic->vcpu->arch.apic_base &
2716
MSR_IA32_APICBASE_BASE;
2717
2718
if ((value & MSR_IA32_APICBASE_ENABLE) &&
2719
apic->base_address != APIC_DEFAULT_PHYS_BASE) {
2720
kvm_set_apicv_inhibit(apic->vcpu->kvm,
2721
APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
2722
}
2723
}
2724
2725
int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated)
2726
{
2727
enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
2728
enum lapic_mode new_mode = kvm_apic_mode(value);
2729
2730
if (vcpu->arch.apic_base == value)
2731
return 0;
2732
2733
u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
2734
(guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
2735
2736
if ((value & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
2737
return 1;
2738
if (!host_initiated) {
2739
if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
2740
return 1;
2741
if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
2742
return 1;
2743
}
2744
2745
__kvm_apic_set_base(vcpu, value);
2746
kvm_recalculate_apic_map(vcpu->kvm);
2747
return 0;
2748
}
2749
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_set_base);
2750
2751
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2752
{
2753
struct kvm_lapic *apic = vcpu->arch.apic;
2754
2755
/*
2756
* When APICv is enabled, KVM must always search the IRR for a pending
2757
* IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
2758
* isn't running. If APICv is disabled, KVM _should_ search the IRR
2759
* for a pending IRQ. But KVM currently doesn't ensure *all* hardware,
2760
* e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
2761
* the IRR at this time could race with IRQ delivery from hardware that
2762
* still sees APICv as being enabled.
2763
*
2764
* FIXME: Ensure other vCPUs and devices observe the change in APICv
2765
* state prior to updating KVM's metadata caches, so that KVM
2766
* can safely search the IRR and set irr_pending accordingly.
2767
*/
2768
apic->irr_pending = true;
2769
2770
if (apic->apicv_active)
2771
apic->isr_count = 1;
2772
else
2773
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2774
2775
apic->highest_isr_cache = -1;
2776
}
2777
2778
int kvm_alloc_apic_access_page(struct kvm *kvm)
2779
{
2780
void __user *hva;
2781
2782
guard(mutex)(&kvm->slots_lock);
2783
2784
if (kvm->arch.apic_access_memslot_enabled ||
2785
kvm->arch.apic_access_memslot_inhibited)
2786
return 0;
2787
2788
hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
2789
APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
2790
if (IS_ERR(hva))
2791
return PTR_ERR(hva);
2792
2793
kvm->arch.apic_access_memslot_enabled = true;
2794
2795
return 0;
2796
}
2797
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_alloc_apic_access_page);
2798
2799
void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu)
2800
{
2801
struct kvm *kvm = vcpu->kvm;
2802
2803
if (!kvm->arch.apic_access_memslot_enabled)
2804
return;
2805
2806
kvm_vcpu_srcu_read_unlock(vcpu);
2807
2808
mutex_lock(&kvm->slots_lock);
2809
2810
if (kvm->arch.apic_access_memslot_enabled) {
2811
__x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
2812
/*
2813
* Clear "enabled" after the memslot is deleted so that a
2814
* different vCPU doesn't get a false negative when checking
2815
* the flag out of slots_lock. No additional memory barrier is
2816
* needed as modifying memslots requires waiting other vCPUs to
2817
* drop SRCU (see above), and false positives are ok as the
2818
* flag is rechecked after acquiring slots_lock.
2819
*/
2820
kvm->arch.apic_access_memslot_enabled = false;
2821
2822
/*
2823
* Mark the memslot as inhibited to prevent reallocating the
2824
* memslot during vCPU creation, e.g. if a vCPU is hotplugged.
2825
*/
2826
kvm->arch.apic_access_memslot_inhibited = true;
2827
}
2828
2829
mutex_unlock(&kvm->slots_lock);
2830
2831
kvm_vcpu_srcu_read_lock(vcpu);
2832
}
2833
2834
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2835
{
2836
struct kvm_lapic *apic = vcpu->arch.apic;
2837
u64 msr_val;
2838
int i;
2839
2840
kvm_x86_call(apicv_pre_state_restore)(vcpu);
2841
2842
if (!init_event) {
2843
msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
2844
if (kvm_vcpu_is_reset_bsp(vcpu))
2845
msr_val |= MSR_IA32_APICBASE_BSP;
2846
2847
/*
2848
* Use the inner helper to avoid an extra recalcuation of the
2849
* optimized APIC map if some other task has dirtied the map.
2850
* The recalculation needed for this vCPU will be done after
2851
* all APIC state has been initialized (see below).
2852
*/
2853
__kvm_apic_set_base(vcpu, msr_val);
2854
}
2855
2856
if (!apic)
2857
return;
2858
2859
/* Stop the timer in case it's a reset to an active apic */
2860
hrtimer_cancel(&apic->lapic_timer.timer);
2861
2862
/* The xAPIC ID is set at RESET even if the APIC was already enabled. */
2863
if (!init_event)
2864
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2865
kvm_apic_set_version(apic->vcpu);
2866
2867
for (i = 0; i < apic->nr_lvt_entries; i++)
2868
kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
2869
apic_update_lvtt(apic);
2870
if (kvm_vcpu_is_reset_bsp(vcpu) &&
2871
kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2872
kvm_lapic_set_reg(apic, APIC_LVT0,
2873
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2874
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2875
2876
kvm_apic_set_dfr(apic, 0xffffffffU);
2877
apic_set_spiv(apic, 0xff);
2878
kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2879
if (!apic_x2apic_mode(apic))
2880
kvm_apic_set_ldr(apic, 0);
2881
kvm_lapic_set_reg(apic, APIC_ESR, 0);
2882
if (!apic_x2apic_mode(apic)) {
2883
kvm_lapic_set_reg(apic, APIC_ICR, 0);
2884
kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2885
} else {
2886
kvm_lapic_set_reg64(apic, APIC_ICR, 0);
2887
}
2888
kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2889
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2890
for (i = 0; i < 8; i++) {
2891
kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2892
kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2893
kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2894
}
2895
kvm_apic_update_apicv(vcpu);
2896
update_divide_count(apic);
2897
atomic_set(&apic->lapic_timer.pending, 0);
2898
2899
vcpu->arch.pv_eoi.msr_val = 0;
2900
apic_update_ppr(apic);
2901
if (apic->apicv_active) {
2902
kvm_x86_call(apicv_post_state_restore)(vcpu);
2903
kvm_x86_call(hwapic_isr_update)(vcpu, -1);
2904
}
2905
2906
vcpu->arch.apic_arb_prio = 0;
2907
vcpu->arch.apic_attention = 0;
2908
2909
kvm_recalculate_apic_map(vcpu->kvm);
2910
}
2911
2912
/*
2913
*----------------------------------------------------------------------
2914
* timer interface
2915
*----------------------------------------------------------------------
2916
*/
2917
2918
static bool lapic_is_periodic(struct kvm_lapic *apic)
2919
{
2920
return apic_lvtt_period(apic);
2921
}
2922
2923
int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2924
{
2925
struct kvm_lapic *apic = vcpu->arch.apic;
2926
2927
if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2928
return atomic_read(&apic->lapic_timer.pending);
2929
2930
return 0;
2931
}
2932
2933
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2934
{
2935
u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2936
int vector, mode, trig_mode;
2937
int r;
2938
2939
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2940
vector = reg & APIC_VECTOR_MASK;
2941
mode = reg & APIC_MODE_MASK;
2942
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2943
2944
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
2945
if (r && lvt_type == APIC_LVTPC &&
2946
guest_cpuid_is_intel_compatible(apic->vcpu))
2947
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
2948
return r;
2949
}
2950
return 0;
2951
}
2952
2953
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2954
{
2955
struct kvm_lapic *apic = vcpu->arch.apic;
2956
2957
if (apic)
2958
kvm_apic_local_deliver(apic, APIC_LVT0);
2959
}
2960
2961
static const struct kvm_io_device_ops apic_mmio_ops = {
2962
.read = apic_mmio_read,
2963
.write = apic_mmio_write,
2964
};
2965
2966
static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2967
{
2968
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2969
struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2970
2971
apic_timer_expired(apic, true);
2972
2973
if (lapic_is_periodic(apic)) {
2974
advance_periodic_target_expiration(apic);
2975
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2976
return HRTIMER_RESTART;
2977
} else
2978
return HRTIMER_NORESTART;
2979
}
2980
2981
int kvm_create_lapic(struct kvm_vcpu *vcpu)
2982
{
2983
struct kvm_lapic *apic;
2984
2985
ASSERT(vcpu != NULL);
2986
2987
if (!irqchip_in_kernel(vcpu->kvm)) {
2988
static_branch_inc(&kvm_has_noapic_vcpu);
2989
return 0;
2990
}
2991
2992
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2993
if (!apic)
2994
goto nomem;
2995
2996
vcpu->arch.apic = apic;
2997
2998
if (kvm_x86_ops.alloc_apic_backing_page)
2999
apic->regs = kvm_x86_call(alloc_apic_backing_page)(vcpu);
3000
else
3001
apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3002
if (!apic->regs) {
3003
printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
3004
vcpu->vcpu_id);
3005
goto nomem_free_apic;
3006
}
3007
apic->vcpu = vcpu;
3008
3009
apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
3010
3011
hrtimer_setup(&apic->lapic_timer.timer, apic_timer_fn, CLOCK_MONOTONIC,
3012
HRTIMER_MODE_ABS_HARD);
3013
if (lapic_timer_advance)
3014
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
3015
3016
/*
3017
* Stuff the APIC ENABLE bit in lieu of temporarily incrementing
3018
* apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
3019
*/
3020
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
3021
static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
3022
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
3023
3024
/*
3025
* Defer evaluating inhibits until the vCPU is first run, as this vCPU
3026
* will not get notified of any changes until this vCPU is visible to
3027
* other vCPUs (marked online and added to the set of vCPUs).
3028
*
3029
* Opportunistically mark APICv active as VMX in particularly is highly
3030
* unlikely to have inhibits. Ignore the current per-VM APICv state so
3031
* that vCPU creation is guaranteed to run with a deterministic value,
3032
* the request will ensure the vCPU gets the correct state before VM-Entry.
3033
*/
3034
if (enable_apicv) {
3035
apic->apicv_active = true;
3036
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
3037
}
3038
3039
return 0;
3040
nomem_free_apic:
3041
kfree(apic);
3042
vcpu->arch.apic = NULL;
3043
nomem:
3044
return -ENOMEM;
3045
}
3046
3047
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
3048
{
3049
struct kvm_lapic *apic = vcpu->arch.apic;
3050
u32 ppr;
3051
3052
if (!kvm_apic_present(vcpu))
3053
return -1;
3054
3055
if (apic->guest_apic_protected)
3056
return -1;
3057
3058
__apic_update_ppr(apic, &ppr);
3059
return apic_has_interrupt_for_ppr(apic, ppr);
3060
}
3061
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_has_interrupt);
3062
3063
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
3064
{
3065
u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
3066
3067
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
3068
return 1;
3069
if ((lvt0 & APIC_LVT_MASKED) == 0 &&
3070
GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
3071
return 1;
3072
return 0;
3073
}
3074
3075
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
3076
{
3077
struct kvm_lapic *apic = vcpu->arch.apic;
3078
3079
if (atomic_read(&apic->lapic_timer.pending) > 0) {
3080
kvm_apic_inject_pending_timer_irqs(apic);
3081
atomic_set(&apic->lapic_timer.pending, 0);
3082
}
3083
}
3084
3085
void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector)
3086
{
3087
struct kvm_lapic *apic = vcpu->arch.apic;
3088
u32 ppr;
3089
3090
if (WARN_ON_ONCE(vector < 0 || !apic))
3091
return;
3092
3093
/*
3094
* We get here even with APIC virtualization enabled, if doing
3095
* nested virtualization and L1 runs with the "acknowledge interrupt
3096
* on exit" mode. Then we cannot inject the interrupt via RVI,
3097
* because the process would deliver it through the IDT.
3098
*/
3099
3100
apic_clear_irr(vector, apic);
3101
if (kvm_hv_synic_auto_eoi_set(vcpu, vector)) {
3102
/*
3103
* For auto-EOI interrupts, there might be another pending
3104
* interrupt above PPR, so check whether to raise another
3105
* KVM_REQ_EVENT.
3106
*/
3107
apic_update_ppr(apic);
3108
} else {
3109
/*
3110
* For normal interrupts, PPR has been raised and there cannot
3111
* be a higher-priority pending interrupt---except if there was
3112
* a concurrent interrupt injection, but that would have
3113
* triggered KVM_REQ_EVENT already.
3114
*/
3115
apic_set_isr(vector, apic);
3116
__apic_update_ppr(apic, &ppr);
3117
}
3118
3119
}
3120
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_ack_interrupt);
3121
3122
static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
3123
struct kvm_lapic_state *s, bool set)
3124
{
3125
if (apic_x2apic_mode(vcpu->arch.apic)) {
3126
u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
3127
u32 *id = (u32 *)(s->regs + APIC_ID);
3128
u32 *ldr = (u32 *)(s->regs + APIC_LDR);
3129
u64 icr;
3130
3131
if (vcpu->kvm->arch.x2apic_format) {
3132
if (*id != x2apic_id)
3133
return -EINVAL;
3134
} else {
3135
/*
3136
* Ignore the userspace value when setting APIC state.
3137
* KVM's model is that the x2APIC ID is readonly, e.g.
3138
* KVM only supports delivering interrupts to KVM's
3139
* version of the x2APIC ID. However, for backwards
3140
* compatibility, don't reject attempts to set a
3141
* mismatched ID for userspace that hasn't opted into
3142
* x2apic_format.
3143
*/
3144
if (set)
3145
*id = x2apic_id;
3146
else
3147
*id = x2apic_id << 24;
3148
}
3149
3150
/*
3151
* In x2APIC mode, the LDR is fixed and based on the id. And
3152
* if the ICR is _not_ split, ICR is internally a single 64-bit
3153
* register, but needs to be split to ICR+ICR2 in userspace for
3154
* backwards compatibility.
3155
*/
3156
if (set)
3157
*ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
3158
3159
if (!kvm_x86_ops.x2apic_icr_is_split) {
3160
if (set) {
3161
icr = apic_get_reg(s->regs, APIC_ICR) |
3162
(u64)apic_get_reg(s->regs, APIC_ICR2) << 32;
3163
apic_set_reg64(s->regs, APIC_ICR, icr);
3164
} else {
3165
icr = apic_get_reg64(s->regs, APIC_ICR);
3166
apic_set_reg(s->regs, APIC_ICR2, icr >> 32);
3167
}
3168
}
3169
}
3170
3171
return 0;
3172
}
3173
3174
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
3175
{
3176
memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
3177
3178
/*
3179
* Get calculated timer current count for remaining timer period (if
3180
* any) and store it in the returned register set.
3181
*/
3182
apic_set_reg(s->regs, APIC_TMCCT, __apic_read(vcpu->arch.apic, APIC_TMCCT));
3183
3184
return kvm_apic_state_fixup(vcpu, s, false);
3185
}
3186
3187
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
3188
{
3189
struct kvm_lapic *apic = vcpu->arch.apic;
3190
int r;
3191
3192
kvm_x86_call(apicv_pre_state_restore)(vcpu);
3193
3194
/* set SPIV separately to get count of SW disabled APICs right */
3195
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
3196
3197
r = kvm_apic_state_fixup(vcpu, s, true);
3198
if (r) {
3199
kvm_recalculate_apic_map(vcpu->kvm);
3200
return r;
3201
}
3202
memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
3203
3204
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
3205
kvm_recalculate_apic_map(vcpu->kvm);
3206
kvm_apic_set_version(vcpu);
3207
3208
apic_update_ppr(apic);
3209
cancel_apic_timer(apic);
3210
apic->lapic_timer.expired_tscdeadline = 0;
3211
apic_update_lvtt(apic);
3212
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
3213
update_divide_count(apic);
3214
__start_apic_timer(apic, APIC_TMCCT);
3215
kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
3216
kvm_apic_update_apicv(vcpu);
3217
if (apic->apicv_active) {
3218
kvm_x86_call(apicv_post_state_restore)(vcpu);
3219
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
3220
}
3221
kvm_make_request(KVM_REQ_EVENT, vcpu);
3222
3223
#ifdef CONFIG_KVM_IOAPIC
3224
if (ioapic_in_kernel(vcpu->kvm))
3225
kvm_rtc_eoi_tracking_restore_one(vcpu);
3226
#endif
3227
3228
vcpu->arch.apic_arb_prio = 0;
3229
3230
return 0;
3231
}
3232
3233
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
3234
{
3235
struct hrtimer *timer;
3236
3237
if (!lapic_in_kernel(vcpu) ||
3238
kvm_can_post_timer_interrupt(vcpu))
3239
return;
3240
3241
timer = &vcpu->arch.apic->lapic_timer.timer;
3242
if (hrtimer_cancel(timer))
3243
hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
3244
}
3245
3246
/*
3247
* apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
3248
*
3249
* Detect whether guest triggered PV EOI since the
3250
* last entry. If yes, set EOI on guests's behalf.
3251
* Clear PV EOI in guest memory in any case.
3252
*/
3253
static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
3254
struct kvm_lapic *apic)
3255
{
3256
int vector;
3257
/*
3258
* PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
3259
* and KVM_PV_EOI_ENABLED in guest memory as follows:
3260
*
3261
* KVM_APIC_PV_EOI_PENDING is unset:
3262
* -> host disabled PV EOI.
3263
* KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
3264
* -> host enabled PV EOI, guest did not execute EOI yet.
3265
* KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
3266
* -> host enabled PV EOI, guest executed EOI.
3267
*/
3268
BUG_ON(!pv_eoi_enabled(vcpu));
3269
3270
if (pv_eoi_test_and_clr_pending(vcpu))
3271
return;
3272
vector = apic_set_eoi(apic);
3273
trace_kvm_pv_eoi(apic, vector);
3274
}
3275
3276
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
3277
{
3278
u32 data;
3279
3280
if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
3281
apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
3282
3283
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
3284
return;
3285
3286
if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
3287
sizeof(u32)))
3288
return;
3289
3290
apic_set_tpr(vcpu->arch.apic, data & 0xff);
3291
}
3292
3293
/*
3294
* apic_sync_pv_eoi_to_guest - called before vmentry
3295
*
3296
* Detect whether it's safe to enable PV EOI and
3297
* if yes do so.
3298
*/
3299
static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
3300
struct kvm_lapic *apic)
3301
{
3302
if (!pv_eoi_enabled(vcpu) ||
3303
/* IRR set or many bits in ISR: could be nested. */
3304
apic->irr_pending ||
3305
/* Cache not set: could be safe but we don't bother. */
3306
apic->highest_isr_cache == -1 ||
3307
/* Need EOI to update ioapic. */
3308
kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
3309
/*
3310
* PV EOI was disabled by apic_sync_pv_eoi_from_guest
3311
* so we need not do anything here.
3312
*/
3313
return;
3314
}
3315
3316
pv_eoi_set_pending(apic->vcpu);
3317
}
3318
3319
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
3320
{
3321
u32 data, tpr;
3322
int max_irr, max_isr;
3323
struct kvm_lapic *apic = vcpu->arch.apic;
3324
3325
apic_sync_pv_eoi_to_guest(vcpu, apic);
3326
3327
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
3328
return;
3329
3330
tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
3331
max_irr = apic_find_highest_irr(apic);
3332
if (max_irr < 0)
3333
max_irr = 0;
3334
max_isr = apic_find_highest_isr(apic);
3335
if (max_isr < 0)
3336
max_isr = 0;
3337
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
3338
3339
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
3340
sizeof(u32));
3341
}
3342
3343
int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
3344
{
3345
if (vapic_addr) {
3346
if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
3347
&vcpu->arch.apic->vapic_cache,
3348
vapic_addr, sizeof(u32)))
3349
return -EINVAL;
3350
__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
3351
} else {
3352
__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
3353
}
3354
3355
vcpu->arch.apic->vapic_addr = vapic_addr;
3356
return 0;
3357
}
3358
3359
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
3360
{
3361
u32 low;
3362
3363
if (reg == APIC_ICR) {
3364
*data = kvm_x2apic_icr_read(apic);
3365
return 0;
3366
}
3367
3368
if (kvm_lapic_reg_read(apic, reg, 4, &low))
3369
return 1;
3370
3371
*data = low;
3372
3373
return 0;
3374
}
3375
3376
static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
3377
{
3378
/*
3379
* ICR is a 64-bit register in x2APIC mode (and Hyper-V PV vAPIC) and
3380
* can be written as such, all other registers remain accessible only
3381
* through 32-bit reads/writes.
3382
*/
3383
if (reg == APIC_ICR)
3384
return kvm_x2apic_icr_write(apic, data);
3385
3386
/* Bits 63:32 are reserved in all other registers. */
3387
if (data >> 32)
3388
return 1;
3389
3390
return kvm_lapic_reg_write(apic, reg, (u32)data);
3391
}
3392
3393
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
3394
{
3395
struct kvm_lapic *apic = vcpu->arch.apic;
3396
u32 reg = (msr - APIC_BASE_MSR) << 4;
3397
3398
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
3399
return 1;
3400
3401
return kvm_lapic_msr_write(apic, reg, data);
3402
}
3403
3404
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
3405
{
3406
struct kvm_lapic *apic = vcpu->arch.apic;
3407
u32 reg = (msr - APIC_BASE_MSR) << 4;
3408
3409
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
3410
return 1;
3411
3412
return kvm_lapic_msr_read(apic, reg, data);
3413
}
3414
3415
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
3416
{
3417
if (!lapic_in_kernel(vcpu))
3418
return 1;
3419
3420
return kvm_lapic_msr_write(vcpu->arch.apic, reg, data);
3421
}
3422
3423
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
3424
{
3425
if (!lapic_in_kernel(vcpu))
3426
return 1;
3427
3428
return kvm_lapic_msr_read(vcpu->arch.apic, reg, data);
3429
}
3430
3431
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
3432
{
3433
u64 addr = data & ~KVM_MSR_ENABLED;
3434
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
3435
unsigned long new_len;
3436
int ret;
3437
3438
if (!IS_ALIGNED(addr, 4))
3439
return 1;
3440
3441
if (data & KVM_MSR_ENABLED) {
3442
if (addr == ghc->gpa && len <= ghc->len)
3443
new_len = ghc->len;
3444
else
3445
new_len = len;
3446
3447
ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
3448
if (ret)
3449
return ret;
3450
}
3451
3452
vcpu->arch.pv_eoi.msr_val = data;
3453
3454
return 0;
3455
}
3456
3457
int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
3458
{
3459
struct kvm_lapic *apic = vcpu->arch.apic;
3460
u8 sipi_vector;
3461
int r;
3462
3463
if (!kvm_apic_has_pending_init_or_sipi(vcpu))
3464
return 0;
3465
3466
if (is_guest_mode(vcpu)) {
3467
r = kvm_check_nested_events(vcpu);
3468
if (r < 0)
3469
return r == -EBUSY ? 0 : r;
3470
/*
3471
* Continue processing INIT/SIPI even if a nested VM-Exit
3472
* occurred, e.g. pending SIPIs should be dropped if INIT+SIPI
3473
* are blocked as a result of transitioning to VMX root mode.
3474
*/
3475
}
3476
3477
/*
3478
* INITs are blocked while CPU is in specific states (SMM, VMX root
3479
* mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in
3480
* wait-for-SIPI (WFS).
3481
*/
3482
if (!kvm_apic_init_sipi_allowed(vcpu)) {
3483
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
3484
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3485
return 0;
3486
}
3487
3488
if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
3489
kvm_vcpu_reset(vcpu, true);
3490
if (kvm_vcpu_is_bsp(apic->vcpu))
3491
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
3492
else
3493
kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
3494
}
3495
if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
3496
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
3497
/* evaluate pending_events before reading the vector */
3498
smp_rmb();
3499
sipi_vector = apic->sipi_vector;
3500
kvm_x86_call(vcpu_deliver_sipi_vector)(vcpu,
3501
sipi_vector);
3502
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
3503
}
3504
}
3505
return 0;
3506
}
3507
3508
void kvm_lapic_exit(void)
3509
{
3510
static_key_deferred_flush(&apic_hw_disabled);
3511
WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
3512
static_key_deferred_flush(&apic_sw_disabled);
3513
WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
3514
}
3515
3516