Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kvm/smm.c
29521 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4
#include <linux/kvm_host.h>
5
#include "x86.h"
6
#include "kvm_cache_regs.h"
7
#include "kvm_emulate.h"
8
#include "smm.h"
9
#include "cpuid.h"
10
#include "trace.h"
11
12
#define CHECK_SMRAM32_OFFSET(field, offset) \
13
ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
14
15
#define CHECK_SMRAM64_OFFSET(field, offset) \
16
ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
17
18
static void check_smram_offsets(void)
19
{
20
/* 32 bit SMRAM image */
21
CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
22
CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
23
CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
24
CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00);
25
CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02);
26
CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04);
27
CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08);
28
CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C);
29
CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10);
30
CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
31
CHECK_SMRAM32_OFFSET(reserved2, 0xFF18);
32
CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A);
33
CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B);
34
CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
35
CHECK_SMRAM32_OFFSET(fs, 0xFF38);
36
CHECK_SMRAM32_OFFSET(gs, 0xFF44);
37
CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
38
CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
39
CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
40
CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
41
CHECK_SMRAM32_OFFSET(es, 0xFF84);
42
CHECK_SMRAM32_OFFSET(cs, 0xFF90);
43
CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
44
CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
45
CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
46
CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
47
CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
48
CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
49
CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
50
CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
51
CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
52
CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
53
CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
54
CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
55
CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
56
CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
57
CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
58
CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
59
60
/* 64 bit SMRAM image */
61
CHECK_SMRAM64_OFFSET(es, 0xFE00);
62
CHECK_SMRAM64_OFFSET(cs, 0xFE10);
63
CHECK_SMRAM64_OFFSET(ss, 0xFE20);
64
CHECK_SMRAM64_OFFSET(ds, 0xFE30);
65
CHECK_SMRAM64_OFFSET(fs, 0xFE40);
66
CHECK_SMRAM64_OFFSET(gs, 0xFE50);
67
CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
68
CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
69
CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
70
CHECK_SMRAM64_OFFSET(tr, 0xFE90);
71
CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
72
CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
73
CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
74
CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
75
CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
76
CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
77
CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
78
CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
79
CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA);
80
CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB);
81
CHECK_SMRAM64_OFFSET(reserved2, 0xFECC);
82
CHECK_SMRAM64_OFFSET(efer, 0xFED0);
83
CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
84
CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
85
CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
86
CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
87
CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
88
CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
89
CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
90
CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
91
CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
92
CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
93
CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
94
CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
95
CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
96
CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
97
CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
98
CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
99
CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
100
CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
101
CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
102
CHECK_SMRAM64_OFFSET(rip, 0xFF78);
103
CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
104
105
BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
106
}
107
108
#undef CHECK_SMRAM64_OFFSET
109
#undef CHECK_SMRAM32_OFFSET
110
111
112
void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
113
{
114
trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
115
116
if (entering_smm) {
117
vcpu->arch.hflags |= HF_SMM_MASK;
118
} else {
119
vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
120
121
/* Process a latched INIT or SMI, if any. */
122
kvm_make_request(KVM_REQ_EVENT, vcpu);
123
124
/*
125
* Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126
* on SMM exit we still need to reload them from
127
* guest memory
128
*/
129
vcpu->arch.pdptrs_from_userspace = false;
130
}
131
132
kvm_mmu_reset_context(vcpu);
133
}
134
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_smm_changed);
135
136
void process_smi(struct kvm_vcpu *vcpu)
137
{
138
vcpu->arch.smi_pending = true;
139
kvm_make_request(KVM_REQ_EVENT, vcpu);
140
}
141
142
static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
143
{
144
u32 flags = 0;
145
flags |= seg->g << 23;
146
flags |= seg->db << 22;
147
flags |= seg->l << 21;
148
flags |= seg->avl << 20;
149
flags |= seg->present << 15;
150
flags |= seg->dpl << 13;
151
flags |= seg->s << 12;
152
flags |= seg->type << 8;
153
return flags;
154
}
155
156
static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
157
struct kvm_smm_seg_state_32 *state,
158
u32 *selector, int n)
159
{
160
struct kvm_segment seg;
161
162
kvm_get_segment(vcpu, &seg, n);
163
*selector = seg.selector;
164
state->base = seg.base;
165
state->limit = seg.limit;
166
state->flags = enter_smm_get_segment_flags(&seg);
167
}
168
169
#ifdef CONFIG_X86_64
170
static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
171
struct kvm_smm_seg_state_64 *state,
172
int n)
173
{
174
struct kvm_segment seg;
175
176
kvm_get_segment(vcpu, &seg, n);
177
state->selector = seg.selector;
178
state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
179
state->limit = seg.limit;
180
state->base = seg.base;
181
}
182
#endif
183
184
static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
185
struct kvm_smram_state_32 *smram)
186
{
187
struct desc_ptr dt;
188
int i;
189
190
smram->cr0 = kvm_read_cr0(vcpu);
191
smram->cr3 = kvm_read_cr3(vcpu);
192
smram->eflags = kvm_get_rflags(vcpu);
193
smram->eip = kvm_rip_read(vcpu);
194
195
for (i = 0; i < 8; i++)
196
smram->gprs[i] = kvm_register_read_raw(vcpu, i);
197
198
smram->dr6 = (u32)vcpu->arch.dr6;
199
smram->dr7 = (u32)vcpu->arch.dr7;
200
201
enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
202
enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
203
204
kvm_x86_call(get_gdt)(vcpu, &dt);
205
smram->gdtr.base = dt.address;
206
smram->gdtr.limit = dt.size;
207
208
kvm_x86_call(get_idt)(vcpu, &dt);
209
smram->idtr.base = dt.address;
210
smram->idtr.limit = dt.size;
211
212
enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
213
enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
214
enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
215
216
enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
217
enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
218
enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
219
220
smram->cr4 = kvm_read_cr4(vcpu);
221
smram->smm_revision = 0x00020000;
222
smram->smbase = vcpu->arch.smbase;
223
224
smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
225
}
226
227
#ifdef CONFIG_X86_64
228
static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
229
struct kvm_smram_state_64 *smram)
230
{
231
struct desc_ptr dt;
232
int i;
233
234
for (i = 0; i < 16; i++)
235
smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
236
237
smram->rip = kvm_rip_read(vcpu);
238
smram->rflags = kvm_get_rflags(vcpu);
239
240
smram->dr6 = vcpu->arch.dr6;
241
smram->dr7 = vcpu->arch.dr7;
242
243
smram->cr0 = kvm_read_cr0(vcpu);
244
smram->cr3 = kvm_read_cr3(vcpu);
245
smram->cr4 = kvm_read_cr4(vcpu);
246
247
smram->smbase = vcpu->arch.smbase;
248
smram->smm_revison = 0x00020064;
249
250
smram->efer = vcpu->arch.efer;
251
252
enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
253
254
kvm_x86_call(get_idt)(vcpu, &dt);
255
smram->idtr.limit = dt.size;
256
smram->idtr.base = dt.address;
257
258
enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
259
260
kvm_x86_call(get_gdt)(vcpu, &dt);
261
smram->gdtr.limit = dt.size;
262
smram->gdtr.base = dt.address;
263
264
enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
265
enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
266
enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
267
enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
268
enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
269
enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
270
271
smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
272
273
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
274
kvm_msr_read(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, &smram->ssp))
275
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
276
}
277
#endif
278
279
void enter_smm(struct kvm_vcpu *vcpu)
280
{
281
struct kvm_segment cs, ds;
282
struct desc_ptr dt;
283
unsigned long cr0;
284
union kvm_smram smram;
285
286
check_smram_offsets();
287
288
memset(smram.bytes, 0, sizeof(smram.bytes));
289
290
#ifdef CONFIG_X86_64
291
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
292
enter_smm_save_state_64(vcpu, &smram.smram64);
293
else
294
#endif
295
enter_smm_save_state_32(vcpu, &smram.smram32);
296
297
/*
298
* Give enter_smm() a chance to make ISA-specific changes to the vCPU
299
* state (e.g. leave guest mode) after we've saved the state into the
300
* SMM state-save area.
301
*
302
* Kill the VM in the unlikely case of failure, because the VM
303
* can be in undefined state in this case.
304
*/
305
if (kvm_x86_call(enter_smm)(vcpu, &smram))
306
goto error;
307
308
kvm_smm_changed(vcpu, true);
309
310
if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
311
goto error;
312
313
if (kvm_x86_call(get_nmi_mask)(vcpu))
314
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
315
else
316
kvm_x86_call(set_nmi_mask)(vcpu, true);
317
318
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
319
kvm_rip_write(vcpu, 0x8000);
320
321
kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
322
323
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
324
kvm_x86_call(set_cr0)(vcpu, cr0);
325
326
kvm_x86_call(set_cr4)(vcpu, 0);
327
328
/* Undocumented: IDT limit is set to zero on entry to SMM. */
329
dt.address = dt.size = 0;
330
kvm_x86_call(set_idt)(vcpu, &dt);
331
332
if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
333
goto error;
334
335
cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
336
cs.base = vcpu->arch.smbase;
337
338
ds.selector = 0;
339
ds.base = 0;
340
341
cs.limit = ds.limit = 0xffffffff;
342
cs.type = ds.type = 0x3;
343
cs.dpl = ds.dpl = 0;
344
cs.db = ds.db = 0;
345
cs.s = ds.s = 1;
346
cs.l = ds.l = 0;
347
cs.g = ds.g = 1;
348
cs.avl = ds.avl = 0;
349
cs.present = ds.present = 1;
350
cs.unusable = ds.unusable = 0;
351
cs.padding = ds.padding = 0;
352
353
kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
354
kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
355
kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
356
kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
357
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
358
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
359
360
#ifdef CONFIG_X86_64
361
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
362
if (kvm_x86_call(set_efer)(vcpu, 0))
363
goto error;
364
#endif
365
366
vcpu->arch.cpuid_dynamic_bits_dirty = true;
367
kvm_mmu_reset_context(vcpu);
368
return;
369
error:
370
kvm_vm_dead(vcpu->kvm);
371
}
372
373
static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
374
{
375
desc->g = (flags >> 23) & 1;
376
desc->db = (flags >> 22) & 1;
377
desc->l = (flags >> 21) & 1;
378
desc->avl = (flags >> 20) & 1;
379
desc->present = (flags >> 15) & 1;
380
desc->dpl = (flags >> 13) & 3;
381
desc->s = (flags >> 12) & 1;
382
desc->type = (flags >> 8) & 15;
383
384
desc->unusable = !desc->present;
385
desc->padding = 0;
386
}
387
388
static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
389
const struct kvm_smm_seg_state_32 *state,
390
u16 selector, int n)
391
{
392
struct kvm_segment desc;
393
394
desc.selector = selector;
395
desc.base = state->base;
396
desc.limit = state->limit;
397
rsm_set_desc_flags(&desc, state->flags);
398
kvm_set_segment(vcpu, &desc, n);
399
return X86EMUL_CONTINUE;
400
}
401
402
#ifdef CONFIG_X86_64
403
404
static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
405
const struct kvm_smm_seg_state_64 *state,
406
int n)
407
{
408
struct kvm_segment desc;
409
410
desc.selector = state->selector;
411
rsm_set_desc_flags(&desc, state->attributes << 8);
412
desc.limit = state->limit;
413
desc.base = state->base;
414
kvm_set_segment(vcpu, &desc, n);
415
return X86EMUL_CONTINUE;
416
}
417
#endif
418
419
static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
420
u64 cr0, u64 cr3, u64 cr4)
421
{
422
int bad;
423
u64 pcid;
424
425
/* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
426
pcid = 0;
427
if (cr4 & X86_CR4_PCIDE) {
428
pcid = cr3 & 0xfff;
429
cr3 &= ~0xfff;
430
}
431
432
bad = kvm_set_cr3(vcpu, cr3);
433
if (bad)
434
return X86EMUL_UNHANDLEABLE;
435
436
/*
437
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
438
* Then enable protected mode. However, PCID cannot be enabled
439
* if EFER.LMA=0, so set it separately.
440
*/
441
bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
442
if (bad)
443
return X86EMUL_UNHANDLEABLE;
444
445
bad = kvm_set_cr0(vcpu, cr0);
446
if (bad)
447
return X86EMUL_UNHANDLEABLE;
448
449
if (cr4 & X86_CR4_PCIDE) {
450
bad = kvm_set_cr4(vcpu, cr4);
451
if (bad)
452
return X86EMUL_UNHANDLEABLE;
453
if (pcid) {
454
bad = kvm_set_cr3(vcpu, cr3 | pcid);
455
if (bad)
456
return X86EMUL_UNHANDLEABLE;
457
}
458
459
}
460
461
return X86EMUL_CONTINUE;
462
}
463
464
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
465
const struct kvm_smram_state_32 *smstate)
466
{
467
struct kvm_vcpu *vcpu = ctxt->vcpu;
468
struct desc_ptr dt;
469
int i, r;
470
471
ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
472
ctxt->_eip = smstate->eip;
473
474
for (i = 0; i < 8; i++)
475
*reg_write(ctxt, i) = smstate->gprs[i];
476
477
if (kvm_set_dr(vcpu, 6, smstate->dr6))
478
return X86EMUL_UNHANDLEABLE;
479
if (kvm_set_dr(vcpu, 7, smstate->dr7))
480
return X86EMUL_UNHANDLEABLE;
481
482
rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
483
rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
484
485
dt.address = smstate->gdtr.base;
486
dt.size = smstate->gdtr.limit;
487
kvm_x86_call(set_gdt)(vcpu, &dt);
488
489
dt.address = smstate->idtr.base;
490
dt.size = smstate->idtr.limit;
491
kvm_x86_call(set_idt)(vcpu, &dt);
492
493
rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
494
rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
495
rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
496
497
rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
498
rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
499
rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
500
501
vcpu->arch.smbase = smstate->smbase;
502
503
r = rsm_enter_protected_mode(vcpu, smstate->cr0,
504
smstate->cr3, smstate->cr4);
505
506
if (r != X86EMUL_CONTINUE)
507
return r;
508
509
kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
510
ctxt->interruptibility = (u8)smstate->int_shadow;
511
512
return r;
513
}
514
515
#ifdef CONFIG_X86_64
516
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
517
const struct kvm_smram_state_64 *smstate)
518
{
519
struct kvm_vcpu *vcpu = ctxt->vcpu;
520
struct desc_ptr dt;
521
int i, r;
522
523
for (i = 0; i < 16; i++)
524
*reg_write(ctxt, i) = smstate->gprs[15 - i];
525
526
ctxt->_eip = smstate->rip;
527
ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
528
529
if (kvm_set_dr(vcpu, 6, smstate->dr6))
530
return X86EMUL_UNHANDLEABLE;
531
if (kvm_set_dr(vcpu, 7, smstate->dr7))
532
return X86EMUL_UNHANDLEABLE;
533
534
vcpu->arch.smbase = smstate->smbase;
535
536
if (__kvm_emulate_msr_write(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
537
return X86EMUL_UNHANDLEABLE;
538
539
rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
540
541
dt.size = smstate->idtr.limit;
542
dt.address = smstate->idtr.base;
543
kvm_x86_call(set_idt)(vcpu, &dt);
544
545
rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
546
547
dt.size = smstate->gdtr.limit;
548
dt.address = smstate->gdtr.base;
549
kvm_x86_call(set_gdt)(vcpu, &dt);
550
551
r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
552
if (r != X86EMUL_CONTINUE)
553
return r;
554
555
rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
556
rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
557
rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
558
rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
559
rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
560
rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
561
562
kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
563
ctxt->interruptibility = (u8)smstate->int_shadow;
564
565
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
566
kvm_msr_write(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, smstate->ssp))
567
return X86EMUL_UNHANDLEABLE;
568
569
return X86EMUL_CONTINUE;
570
}
571
#endif
572
573
int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
574
{
575
struct kvm_vcpu *vcpu = ctxt->vcpu;
576
unsigned long cr0;
577
union kvm_smram smram;
578
u64 smbase;
579
int ret;
580
581
smbase = vcpu->arch.smbase;
582
583
ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
584
if (ret < 0)
585
return X86EMUL_UNHANDLEABLE;
586
587
if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
588
kvm_x86_call(set_nmi_mask)(vcpu, false);
589
590
kvm_smm_changed(vcpu, false);
591
592
/*
593
* Get back to real mode, to prepare a safe state in which to load
594
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
595
* supports long mode.
596
*/
597
#ifdef CONFIG_X86_64
598
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
599
struct kvm_segment cs_desc;
600
unsigned long cr4;
601
602
/* Zero CR4.PCIDE before CR0.PG. */
603
cr4 = kvm_read_cr4(vcpu);
604
if (cr4 & X86_CR4_PCIDE)
605
kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
606
607
/* A 32-bit code segment is required to clear EFER.LMA. */
608
memset(&cs_desc, 0, sizeof(cs_desc));
609
cs_desc.type = 0xb;
610
cs_desc.s = cs_desc.g = cs_desc.present = 1;
611
kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
612
}
613
#endif
614
615
/* For the 64-bit case, this will clear EFER.LMA. */
616
cr0 = kvm_read_cr0(vcpu);
617
if (cr0 & X86_CR0_PE)
618
kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
619
620
#ifdef CONFIG_X86_64
621
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
622
unsigned long cr4, efer;
623
624
/* Clear CR4.PAE before clearing EFER.LME. */
625
cr4 = kvm_read_cr4(vcpu);
626
if (cr4 & X86_CR4_PAE)
627
kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
628
629
/* And finally go back to 32-bit mode. */
630
efer = 0;
631
__kvm_emulate_msr_write(vcpu, MSR_EFER, efer);
632
}
633
#endif
634
635
/*
636
* FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
637
* mode should happen _after_ loading state from SMRAM. However, KVM
638
* piggybacks the nested VM-Enter flows (which is wrong for many other
639
* reasons), and so nSVM/nVMX would clobber state that is loaded from
640
* SMRAM and from the VMCS/VMCB.
641
*/
642
if (kvm_x86_call(leave_smm)(vcpu, &smram))
643
return X86EMUL_UNHANDLEABLE;
644
645
#ifdef CONFIG_X86_64
646
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
647
ret = rsm_load_state_64(ctxt, &smram.smram64);
648
else
649
#endif
650
ret = rsm_load_state_32(ctxt, &smram.smram32);
651
652
/*
653
* If RSM fails and triggers shutdown, architecturally the shutdown
654
* occurs *before* the transition to guest mode. But due to KVM's
655
* flawed handling of RSM to L2 (see above), the vCPU may already be
656
* in_guest_mode(). Force the vCPU out of guest mode before delivering
657
* the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
658
* that architecturally shouldn't be possible.
659
*/
660
if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
661
kvm_leave_nested(vcpu);
662
return ret;
663
}
664
665