Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kvm/emulate.c
29521 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/******************************************************************************
3
* emulate.c
4
*
5
* Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6
*
7
* Copyright (c) 2005 Keir Fraser
8
*
9
* Linux coding style, mod r/m decoder, segment base fixes, real-mode
10
* privileged instructions:
11
*
12
* Copyright (C) 2006 Qumranet
13
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
14
*
15
* Avi Kivity <[email protected]>
16
* Yaniv Kamay <[email protected]>
17
*
18
* From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
19
*/
20
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22
#include <linux/kvm_host.h>
23
#include "kvm_cache_regs.h"
24
#include "kvm_emulate.h"
25
#include <linux/stringify.h>
26
#include <asm/debugreg.h>
27
#include <asm/nospec-branch.h>
28
#include <asm/ibt.h>
29
30
#include "x86.h"
31
#include "tss.h"
32
#include "mmu.h"
33
#include "pmu.h"
34
35
/*
36
* Operand types
37
*/
38
#define OpNone 0ull
39
#define OpImplicit 1ull /* No generic decode */
40
#define OpReg 2ull /* Register */
41
#define OpMem 3ull /* Memory */
42
#define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
43
#define OpDI 5ull /* ES:DI/EDI/RDI */
44
#define OpMem64 6ull /* Memory, 64-bit */
45
#define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
46
#define OpDX 8ull /* DX register */
47
#define OpCL 9ull /* CL register (for shifts) */
48
#define OpImmByte 10ull /* 8-bit sign extended immediate */
49
#define OpOne 11ull /* Implied 1 */
50
#define OpImm 12ull /* Sign extended up to 32-bit immediate */
51
#define OpMem16 13ull /* Memory operand (16-bit). */
52
#define OpMem32 14ull /* Memory operand (32-bit). */
53
#define OpImmU 15ull /* Immediate operand, zero extended */
54
#define OpSI 16ull /* SI/ESI/RSI */
55
#define OpImmFAddr 17ull /* Immediate far address */
56
#define OpMemFAddr 18ull /* Far address in memory */
57
#define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
58
#define OpES 20ull /* ES */
59
#define OpCS 21ull /* CS */
60
#define OpSS 22ull /* SS */
61
#define OpDS 23ull /* DS */
62
#define OpFS 24ull /* FS */
63
#define OpGS 25ull /* GS */
64
#define OpMem8 26ull /* 8-bit zero extended memory operand */
65
#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
66
#define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
67
#define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
68
#define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
69
70
#define OpBits 5 /* Width of operand field */
71
#define OpMask ((1ull << OpBits) - 1)
72
73
/*
74
* Opcode effective-address decode tables.
75
* Note that we only emulate instructions that have at least one memory
76
* operand (excluding implicit stack references). We assume that stack
77
* references and instruction fetches will never occur in special memory
78
* areas that require emulation. So, for example, 'mov <imm>,<reg>' need
79
* not be handled.
80
*/
81
82
/* Operand sizes: 8-bit operands or specified/overridden size. */
83
#define ByteOp (1<<0) /* 8-bit operands. */
84
/* Destination operand type. */
85
#define DstShift 1
86
#define ImplicitOps (OpImplicit << DstShift)
87
#define DstReg (OpReg << DstShift)
88
#define DstMem (OpMem << DstShift)
89
#define DstAcc (OpAcc << DstShift)
90
#define DstDI (OpDI << DstShift)
91
#define DstMem64 (OpMem64 << DstShift)
92
#define DstMem16 (OpMem16 << DstShift)
93
#define DstImmUByte (OpImmUByte << DstShift)
94
#define DstDX (OpDX << DstShift)
95
#define DstAccLo (OpAccLo << DstShift)
96
#define DstMask (OpMask << DstShift)
97
/* Source operand type. */
98
#define SrcShift 6
99
#define SrcNone (OpNone << SrcShift)
100
#define SrcReg (OpReg << SrcShift)
101
#define SrcMem (OpMem << SrcShift)
102
#define SrcMem16 (OpMem16 << SrcShift)
103
#define SrcMem32 (OpMem32 << SrcShift)
104
#define SrcImm (OpImm << SrcShift)
105
#define SrcImmByte (OpImmByte << SrcShift)
106
#define SrcOne (OpOne << SrcShift)
107
#define SrcImmUByte (OpImmUByte << SrcShift)
108
#define SrcImmU (OpImmU << SrcShift)
109
#define SrcSI (OpSI << SrcShift)
110
#define SrcXLat (OpXLat << SrcShift)
111
#define SrcImmFAddr (OpImmFAddr << SrcShift)
112
#define SrcMemFAddr (OpMemFAddr << SrcShift)
113
#define SrcAcc (OpAcc << SrcShift)
114
#define SrcImmU16 (OpImmU16 << SrcShift)
115
#define SrcImm64 (OpImm64 << SrcShift)
116
#define SrcDX (OpDX << SrcShift)
117
#define SrcMem8 (OpMem8 << SrcShift)
118
#define SrcAccHi (OpAccHi << SrcShift)
119
#define SrcMask (OpMask << SrcShift)
120
#define BitOp (1<<11)
121
#define MemAbs (1<<12) /* Memory operand is absolute displacement */
122
#define String (1<<13) /* String instruction (rep capable) */
123
#define Stack (1<<14) /* Stack instruction (push/pop) */
124
#define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
125
#define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
126
#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
127
#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
128
#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
129
#define Escape (5<<15) /* Escape to coprocessor instruction */
130
#define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
131
#define ModeDual (7<<15) /* Different instruction for 32/64 bit */
132
#define Sse (1<<18) /* SSE Vector instruction */
133
/* Generic ModRM decode. */
134
#define ModRM (1<<19)
135
/* Destination is only written; never read. */
136
#define Mov (1<<20)
137
/* Misc flags */
138
#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
139
#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
140
#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
141
#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
142
#define Undefined (1<<25) /* No Such Instruction */
143
#define Lock (1<<26) /* lock prefix is allowed for the instruction */
144
#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
145
#define No64 (1<<28)
146
#define PageTable (1 << 29) /* instruction used to write page table */
147
#define NotImpl (1 << 30) /* instruction is not implemented */
148
/* Source 2 operand type */
149
#define Src2Shift (31)
150
#define Src2None (OpNone << Src2Shift)
151
#define Src2Mem (OpMem << Src2Shift)
152
#define Src2CL (OpCL << Src2Shift)
153
#define Src2ImmByte (OpImmByte << Src2Shift)
154
#define Src2One (OpOne << Src2Shift)
155
#define Src2Imm (OpImm << Src2Shift)
156
#define Src2ES (OpES << Src2Shift)
157
#define Src2CS (OpCS << Src2Shift)
158
#define Src2SS (OpSS << Src2Shift)
159
#define Src2DS (OpDS << Src2Shift)
160
#define Src2FS (OpFS << Src2Shift)
161
#define Src2GS (OpGS << Src2Shift)
162
#define Src2Mask (OpMask << Src2Shift)
163
#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
164
#define AlignMask ((u64)7 << 41)
165
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
166
#define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
167
#define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
168
#define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
169
#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
170
#define NoWrite ((u64)1 << 45) /* No writeback */
171
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
172
#define NoMod ((u64)1 << 47) /* Mod field is ignored */
173
#define Intercept ((u64)1 << 48) /* Has valid intercept field */
174
#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
175
#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
176
#define NearBranch ((u64)1 << 52) /* Near branches */
177
#define No16 ((u64)1 << 53) /* No 16 bit operand */
178
#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
179
#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
180
#define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
181
#define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */
182
183
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
184
185
#define X2(x...) x, x
186
#define X3(x...) X2(x), x
187
#define X4(x...) X2(x), X2(x)
188
#define X5(x...) X4(x), x
189
#define X6(x...) X4(x), X2(x)
190
#define X7(x...) X4(x), X3(x)
191
#define X8(x...) X4(x), X4(x)
192
#define X16(x...) X8(x), X8(x)
193
194
struct opcode {
195
u64 flags;
196
u8 intercept;
197
u8 pad[7];
198
union {
199
int (*execute)(struct x86_emulate_ctxt *ctxt);
200
const struct opcode *group;
201
const struct group_dual *gdual;
202
const struct gprefix *gprefix;
203
const struct escape *esc;
204
const struct instr_dual *idual;
205
const struct mode_dual *mdual;
206
void (*fastop)(struct fastop *fake);
207
} u;
208
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
209
};
210
211
struct group_dual {
212
struct opcode mod012[8];
213
struct opcode mod3[8];
214
};
215
216
struct gprefix {
217
struct opcode pfx_no;
218
struct opcode pfx_66;
219
struct opcode pfx_f2;
220
struct opcode pfx_f3;
221
};
222
223
struct escape {
224
struct opcode op[8];
225
struct opcode high[64];
226
};
227
228
struct instr_dual {
229
struct opcode mod012;
230
struct opcode mod3;
231
};
232
233
struct mode_dual {
234
struct opcode mode32;
235
struct opcode mode64;
236
};
237
238
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
239
240
enum x86_transfer_type {
241
X86_TRANSFER_NONE,
242
X86_TRANSFER_CALL_JMP,
243
X86_TRANSFER_RET,
244
X86_TRANSFER_TASK_SWITCH,
245
};
246
247
static void writeback_registers(struct x86_emulate_ctxt *ctxt)
248
{
249
unsigned long dirty = ctxt->regs_dirty;
250
unsigned reg;
251
252
for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
253
ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
254
}
255
256
static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
257
{
258
ctxt->regs_dirty = 0;
259
ctxt->regs_valid = 0;
260
}
261
262
/*
263
* These EFLAGS bits are restored from saved value during emulation, and
264
* any changes are written back to the saved value after emulation.
265
*/
266
#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
267
X86_EFLAGS_PF|X86_EFLAGS_CF)
268
269
#ifdef CONFIG_X86_64
270
#define ON64(x) x
271
#else
272
#define ON64(x)
273
#endif
274
275
/*
276
* fastop functions have a special calling convention:
277
*
278
* dst: rax (in/out)
279
* src: rdx (in/out)
280
* src2: rcx (in)
281
* flags: rflags (in/out)
282
* ex: rsi (in:fastop pointer, out:zero if exception)
283
*
284
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
285
* different operand sizes can be reached by calculation, rather than a jump
286
* table (which would be bigger than the code).
287
*
288
* The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
289
* and 1 for the straight line speculation INT3, leaves 7 bytes for the
290
* body of the function. Currently none is larger than 4.
291
*/
292
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
293
294
#define FASTOP_SIZE 16
295
296
#define __FOP_FUNC(name) \
297
".align " __stringify(FASTOP_SIZE) " \n\t" \
298
".type " name ", @function \n\t" \
299
name ":\n\t" \
300
ASM_ENDBR \
301
IBT_NOSEAL(name)
302
303
#define FOP_FUNC(name) \
304
__FOP_FUNC(#name)
305
306
#define __FOP_RET(name) \
307
"11: " ASM_RET \
308
".size " name ", .-" name "\n\t"
309
310
#define FOP_RET(name) \
311
__FOP_RET(#name)
312
313
#define __FOP_START(op, align) \
314
extern void em_##op(struct fastop *fake); \
315
asm(".pushsection .text, \"ax\" \n\t" \
316
".global em_" #op " \n\t" \
317
".align " __stringify(align) " \n\t" \
318
"em_" #op ":\n\t"
319
320
#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
321
322
#define FOP_END \
323
".popsection")
324
325
#define __FOPNOP(name) \
326
__FOP_FUNC(name) \
327
__FOP_RET(name)
328
329
#define FOPNOP() \
330
__FOPNOP(__stringify(__UNIQUE_ID(nop)))
331
332
#define FOP1E(op, dst) \
333
__FOP_FUNC(#op "_" #dst) \
334
"10: " #op " %" #dst " \n\t" \
335
__FOP_RET(#op "_" #dst)
336
337
#define FOP1EEX(op, dst) \
338
FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
339
340
#define FASTOP1(op) \
341
FOP_START(op) \
342
FOP1E(op##b, al) \
343
FOP1E(op##w, ax) \
344
FOP1E(op##l, eax) \
345
ON64(FOP1E(op##q, rax)) \
346
FOP_END
347
348
/* 1-operand, using src2 (for MUL/DIV r/m) */
349
#define FASTOP1SRC2(op, name) \
350
FOP_START(name) \
351
FOP1E(op, cl) \
352
FOP1E(op, cx) \
353
FOP1E(op, ecx) \
354
ON64(FOP1E(op, rcx)) \
355
FOP_END
356
357
/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
358
#define FASTOP1SRC2EX(op, name) \
359
FOP_START(name) \
360
FOP1EEX(op, cl) \
361
FOP1EEX(op, cx) \
362
FOP1EEX(op, ecx) \
363
ON64(FOP1EEX(op, rcx)) \
364
FOP_END
365
366
#define FOP2E(op, dst, src) \
367
__FOP_FUNC(#op "_" #dst "_" #src) \
368
#op " %" #src ", %" #dst " \n\t" \
369
__FOP_RET(#op "_" #dst "_" #src)
370
371
#define FASTOP2(op) \
372
FOP_START(op) \
373
FOP2E(op##b, al, dl) \
374
FOP2E(op##w, ax, dx) \
375
FOP2E(op##l, eax, edx) \
376
ON64(FOP2E(op##q, rax, rdx)) \
377
FOP_END
378
379
/* 2 operand, word only */
380
#define FASTOP2W(op) \
381
FOP_START(op) \
382
FOPNOP() \
383
FOP2E(op##w, ax, dx) \
384
FOP2E(op##l, eax, edx) \
385
ON64(FOP2E(op##q, rax, rdx)) \
386
FOP_END
387
388
/* 2 operand, src is CL */
389
#define FASTOP2CL(op) \
390
FOP_START(op) \
391
FOP2E(op##b, al, cl) \
392
FOP2E(op##w, ax, cl) \
393
FOP2E(op##l, eax, cl) \
394
ON64(FOP2E(op##q, rax, cl)) \
395
FOP_END
396
397
/* 2 operand, src and dest are reversed */
398
#define FASTOP2R(op, name) \
399
FOP_START(name) \
400
FOP2E(op##b, dl, al) \
401
FOP2E(op##w, dx, ax) \
402
FOP2E(op##l, edx, eax) \
403
ON64(FOP2E(op##q, rdx, rax)) \
404
FOP_END
405
406
#define FOP3E(op, dst, src, src2) \
407
__FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
408
#op " %" #src2 ", %" #src ", %" #dst " \n\t"\
409
__FOP_RET(#op "_" #dst "_" #src "_" #src2)
410
411
/* 3-operand, word-only, src2=cl */
412
#define FASTOP3WCL(op) \
413
FOP_START(op) \
414
FOPNOP() \
415
FOP3E(op##w, ax, dx, cl) \
416
FOP3E(op##l, eax, edx, cl) \
417
ON64(FOP3E(op##q, rax, rdx, cl)) \
418
FOP_END
419
420
/* Special case for SETcc - 1 instruction per cc */
421
#define FOP_SETCC(op) \
422
FOP_FUNC(op) \
423
#op " %al \n\t" \
424
FOP_RET(op)
425
426
FOP_START(setcc)
427
FOP_SETCC(seto)
428
FOP_SETCC(setno)
429
FOP_SETCC(setc)
430
FOP_SETCC(setnc)
431
FOP_SETCC(setz)
432
FOP_SETCC(setnz)
433
FOP_SETCC(setbe)
434
FOP_SETCC(setnbe)
435
FOP_SETCC(sets)
436
FOP_SETCC(setns)
437
FOP_SETCC(setp)
438
FOP_SETCC(setnp)
439
FOP_SETCC(setl)
440
FOP_SETCC(setnl)
441
FOP_SETCC(setle)
442
FOP_SETCC(setnle)
443
FOP_END;
444
445
FOP_START(salc)
446
FOP_FUNC(salc)
447
"pushf; sbb %al, %al; popf \n\t"
448
FOP_RET(salc)
449
FOP_END;
450
451
/*
452
* XXX: inoutclob user must know where the argument is being expanded.
453
* Using asm goto would allow us to remove _fault.
454
*/
455
#define asm_safe(insn, inoutclob...) \
456
({ \
457
int _fault = 0; \
458
\
459
asm volatile("1:" insn "\n" \
460
"2:\n" \
461
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
462
: [_fault] "+r"(_fault) inoutclob ); \
463
\
464
_fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
465
})
466
467
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
468
enum x86_intercept intercept,
469
enum x86_intercept_stage stage)
470
{
471
struct x86_instruction_info info = {
472
.intercept = intercept,
473
.rep_prefix = ctxt->rep_prefix,
474
.modrm_mod = ctxt->modrm_mod,
475
.modrm_reg = ctxt->modrm_reg,
476
.modrm_rm = ctxt->modrm_rm,
477
.src_val = ctxt->src.val64,
478
.dst_val = ctxt->dst.val64,
479
.src_bytes = ctxt->src.bytes,
480
.dst_bytes = ctxt->dst.bytes,
481
.src_type = ctxt->src.type,
482
.dst_type = ctxt->dst.type,
483
.ad_bytes = ctxt->ad_bytes,
484
.rip = ctxt->eip,
485
.next_rip = ctxt->_eip,
486
};
487
488
return ctxt->ops->intercept(ctxt, &info, stage);
489
}
490
491
static void assign_masked(ulong *dest, ulong src, ulong mask)
492
{
493
*dest = (*dest & ~mask) | (src & mask);
494
}
495
496
static void assign_register(unsigned long *reg, u64 val, int bytes)
497
{
498
/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
499
switch (bytes) {
500
case 1:
501
*(u8 *)reg = (u8)val;
502
break;
503
case 2:
504
*(u16 *)reg = (u16)val;
505
break;
506
case 4:
507
*reg = (u32)val;
508
break; /* 64b: zero-extend */
509
case 8:
510
*reg = val;
511
break;
512
}
513
}
514
515
static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
516
{
517
return (1UL << (ctxt->ad_bytes << 3)) - 1;
518
}
519
520
static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
521
{
522
u16 sel;
523
struct desc_struct ss;
524
525
if (ctxt->mode == X86EMUL_MODE_PROT64)
526
return ~0UL;
527
ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
528
return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
529
}
530
531
static int stack_size(struct x86_emulate_ctxt *ctxt)
532
{
533
return (__fls(stack_mask(ctxt)) + 1) >> 3;
534
}
535
536
/* Access/update address held in a register, based on addressing mode. */
537
static inline unsigned long
538
address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
539
{
540
if (ctxt->ad_bytes == sizeof(unsigned long))
541
return reg;
542
else
543
return reg & ad_mask(ctxt);
544
}
545
546
static inline unsigned long
547
register_address(struct x86_emulate_ctxt *ctxt, int reg)
548
{
549
return address_mask(ctxt, reg_read(ctxt, reg));
550
}
551
552
static void masked_increment(ulong *reg, ulong mask, int inc)
553
{
554
assign_masked(reg, *reg + inc, mask);
555
}
556
557
static inline void
558
register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
559
{
560
ulong *preg = reg_rmw(ctxt, reg);
561
562
assign_register(preg, *preg + inc, ctxt->ad_bytes);
563
}
564
565
static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
566
{
567
masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
568
}
569
570
static u32 desc_limit_scaled(struct desc_struct *desc)
571
{
572
u32 limit = get_desc_limit(desc);
573
574
return desc->g ? (limit << 12) | 0xfff : limit;
575
}
576
577
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
578
{
579
if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
580
return 0;
581
582
return ctxt->ops->get_cached_segment_base(ctxt, seg);
583
}
584
585
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
586
u32 error, bool valid)
587
{
588
if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
589
return X86EMUL_UNHANDLEABLE;
590
591
ctxt->exception.vector = vec;
592
ctxt->exception.error_code = error;
593
ctxt->exception.error_code_valid = valid;
594
return X86EMUL_PROPAGATE_FAULT;
595
}
596
597
static int emulate_db(struct x86_emulate_ctxt *ctxt)
598
{
599
return emulate_exception(ctxt, DB_VECTOR, 0, false);
600
}
601
602
static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
603
{
604
return emulate_exception(ctxt, GP_VECTOR, err, true);
605
}
606
607
static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
608
{
609
return emulate_exception(ctxt, SS_VECTOR, err, true);
610
}
611
612
static int emulate_ud(struct x86_emulate_ctxt *ctxt)
613
{
614
return emulate_exception(ctxt, UD_VECTOR, 0, false);
615
}
616
617
static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
618
{
619
return emulate_exception(ctxt, TS_VECTOR, err, true);
620
}
621
622
static int emulate_de(struct x86_emulate_ctxt *ctxt)
623
{
624
return emulate_exception(ctxt, DE_VECTOR, 0, false);
625
}
626
627
static int emulate_nm(struct x86_emulate_ctxt *ctxt)
628
{
629
return emulate_exception(ctxt, NM_VECTOR, 0, false);
630
}
631
632
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
633
{
634
u16 selector;
635
struct desc_struct desc;
636
637
ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
638
return selector;
639
}
640
641
static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
642
unsigned seg)
643
{
644
u16 dummy;
645
u32 base3;
646
struct desc_struct desc;
647
648
ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
649
ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
650
}
651
652
static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
653
{
654
return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
655
}
656
657
static inline bool emul_is_noncanonical_address(u64 la,
658
struct x86_emulate_ctxt *ctxt,
659
unsigned int flags)
660
{
661
return !ctxt->ops->is_canonical_addr(ctxt, la, flags);
662
}
663
664
/*
665
* x86 defines three classes of vector instructions: explicitly
666
* aligned, explicitly unaligned, and the rest, which change behaviour
667
* depending on whether they're AVX encoded or not.
668
*
669
* Also included is CMPXCHG16B which is not a vector instruction, yet it is
670
* subject to the same check. FXSAVE and FXRSTOR are checked here too as their
671
* 512 bytes of data must be aligned to a 16 byte boundary.
672
*/
673
static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
674
{
675
u64 alignment = ctxt->d & AlignMask;
676
677
if (likely(size < 16))
678
return 1;
679
680
switch (alignment) {
681
case Unaligned:
682
case Avx:
683
return 1;
684
case Aligned16:
685
return 16;
686
case Aligned:
687
default:
688
return size;
689
}
690
}
691
692
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
693
struct segmented_address addr,
694
unsigned *max_size, unsigned size,
695
enum x86emul_mode mode, ulong *linear,
696
unsigned int flags)
697
{
698
struct desc_struct desc;
699
bool usable;
700
ulong la;
701
u32 lim;
702
u16 sel;
703
u8 va_bits;
704
705
la = seg_base(ctxt, addr.seg) + addr.ea;
706
*max_size = 0;
707
switch (mode) {
708
case X86EMUL_MODE_PROT64:
709
*linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
710
va_bits = ctxt_virt_addr_bits(ctxt);
711
if (!__is_canonical_address(la, va_bits))
712
goto bad;
713
714
*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
715
if (size > *max_size)
716
goto bad;
717
break;
718
default:
719
*linear = la = (u32)la;
720
usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
721
addr.seg);
722
if (!usable)
723
goto bad;
724
/* code segment in protected mode or read-only data segment */
725
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
726
(flags & X86EMUL_F_WRITE))
727
goto bad;
728
/* unreadable code segment */
729
if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
730
goto bad;
731
lim = desc_limit_scaled(&desc);
732
if (!(desc.type & 8) && (desc.type & 4)) {
733
/* expand-down segment */
734
if (addr.ea <= lim)
735
goto bad;
736
lim = desc.d ? 0xffffffff : 0xffff;
737
}
738
if (addr.ea > lim)
739
goto bad;
740
if (lim == 0xffffffff)
741
*max_size = ~0u;
742
else {
743
*max_size = (u64)lim + 1 - addr.ea;
744
if (size > *max_size)
745
goto bad;
746
}
747
break;
748
}
749
if (la & (insn_alignment(ctxt, size) - 1))
750
return emulate_gp(ctxt, 0);
751
return X86EMUL_CONTINUE;
752
bad:
753
if (addr.seg == VCPU_SREG_SS)
754
return emulate_ss(ctxt, 0);
755
else
756
return emulate_gp(ctxt, 0);
757
}
758
759
static int linearize(struct x86_emulate_ctxt *ctxt,
760
struct segmented_address addr,
761
unsigned size, bool write,
762
ulong *linear)
763
{
764
unsigned max_size;
765
return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
766
write ? X86EMUL_F_WRITE : 0);
767
}
768
769
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
770
{
771
ulong linear;
772
int rc;
773
unsigned max_size;
774
struct segmented_address addr = { .seg = VCPU_SREG_CS,
775
.ea = dst };
776
777
if (ctxt->op_bytes != sizeof(unsigned long))
778
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
779
rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
780
X86EMUL_F_FETCH);
781
if (rc == X86EMUL_CONTINUE)
782
ctxt->_eip = addr.ea;
783
return rc;
784
}
785
786
static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
787
{
788
u64 efer;
789
struct desc_struct cs;
790
u16 selector;
791
u32 base3;
792
793
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
794
795
if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
796
/* Real mode. cpu must not have long mode active */
797
if (efer & EFER_LMA)
798
return X86EMUL_UNHANDLEABLE;
799
ctxt->mode = X86EMUL_MODE_REAL;
800
return X86EMUL_CONTINUE;
801
}
802
803
if (ctxt->eflags & X86_EFLAGS_VM) {
804
/* Protected/VM86 mode. cpu must not have long mode active */
805
if (efer & EFER_LMA)
806
return X86EMUL_UNHANDLEABLE;
807
ctxt->mode = X86EMUL_MODE_VM86;
808
return X86EMUL_CONTINUE;
809
}
810
811
if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
812
return X86EMUL_UNHANDLEABLE;
813
814
if (efer & EFER_LMA) {
815
if (cs.l) {
816
/* Proper long mode */
817
ctxt->mode = X86EMUL_MODE_PROT64;
818
} else if (cs.d) {
819
/* 32 bit compatibility mode*/
820
ctxt->mode = X86EMUL_MODE_PROT32;
821
} else {
822
ctxt->mode = X86EMUL_MODE_PROT16;
823
}
824
} else {
825
/* Legacy 32 bit / 16 bit mode */
826
ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
827
}
828
829
return X86EMUL_CONTINUE;
830
}
831
832
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
833
{
834
return assign_eip(ctxt, dst);
835
}
836
837
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
838
{
839
int rc = emulator_recalc_and_set_mode(ctxt);
840
841
if (rc != X86EMUL_CONTINUE)
842
return rc;
843
844
return assign_eip(ctxt, dst);
845
}
846
847
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
848
{
849
return assign_eip_near(ctxt, ctxt->_eip + rel);
850
}
851
852
static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
853
void *data, unsigned size)
854
{
855
return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
856
}
857
858
static int linear_write_system(struct x86_emulate_ctxt *ctxt,
859
ulong linear, void *data,
860
unsigned int size)
861
{
862
return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
863
}
864
865
static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
866
struct segmented_address addr,
867
void *data,
868
unsigned size)
869
{
870
int rc;
871
ulong linear;
872
873
rc = linearize(ctxt, addr, size, false, &linear);
874
if (rc != X86EMUL_CONTINUE)
875
return rc;
876
return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
877
}
878
879
static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
880
struct segmented_address addr,
881
void *data,
882
unsigned int size)
883
{
884
int rc;
885
ulong linear;
886
887
rc = linearize(ctxt, addr, size, true, &linear);
888
if (rc != X86EMUL_CONTINUE)
889
return rc;
890
return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
891
}
892
893
/*
894
* Prefetch the remaining bytes of the instruction without crossing page
895
* boundary if they are not in fetch_cache yet.
896
*/
897
static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
898
{
899
int rc;
900
unsigned size, max_size;
901
unsigned long linear;
902
int cur_size = ctxt->fetch.end - ctxt->fetch.data;
903
struct segmented_address addr = { .seg = VCPU_SREG_CS,
904
.ea = ctxt->eip + cur_size };
905
906
/*
907
* We do not know exactly how many bytes will be needed, and
908
* __linearize is expensive, so fetch as much as possible. We
909
* just have to avoid going beyond the 15 byte limit, the end
910
* of the segment, or the end of the page.
911
*
912
* __linearize is called with size 0 so that it does not do any
913
* boundary check itself. Instead, we use max_size to check
914
* against op_size.
915
*/
916
rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
917
X86EMUL_F_FETCH);
918
if (unlikely(rc != X86EMUL_CONTINUE))
919
return rc;
920
921
size = min_t(unsigned, 15UL ^ cur_size, max_size);
922
size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
923
924
/*
925
* One instruction can only straddle two pages,
926
* and one has been loaded at the beginning of
927
* x86_decode_insn. So, if not enough bytes
928
* still, we must have hit the 15-byte boundary.
929
*/
930
if (unlikely(size < op_size))
931
return emulate_gp(ctxt, 0);
932
933
rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
934
size, &ctxt->exception);
935
if (unlikely(rc != X86EMUL_CONTINUE))
936
return rc;
937
ctxt->fetch.end += size;
938
return X86EMUL_CONTINUE;
939
}
940
941
static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
942
unsigned size)
943
{
944
unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
945
946
if (unlikely(done_size < size))
947
return __do_insn_fetch_bytes(ctxt, size - done_size);
948
else
949
return X86EMUL_CONTINUE;
950
}
951
952
/* Fetch next part of the instruction being emulated. */
953
#define insn_fetch(_type, _ctxt) \
954
({ _type _x; \
955
\
956
rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
957
if (rc != X86EMUL_CONTINUE) \
958
goto done; \
959
ctxt->_eip += sizeof(_type); \
960
memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
961
ctxt->fetch.ptr += sizeof(_type); \
962
_x; \
963
})
964
965
#define insn_fetch_arr(_arr, _size, _ctxt) \
966
({ \
967
rc = do_insn_fetch_bytes(_ctxt, _size); \
968
if (rc != X86EMUL_CONTINUE) \
969
goto done; \
970
ctxt->_eip += (_size); \
971
memcpy(_arr, ctxt->fetch.ptr, _size); \
972
ctxt->fetch.ptr += (_size); \
973
})
974
975
/*
976
* Given the 'reg' portion of a ModRM byte, and a register block, return a
977
* pointer into the block that addresses the relevant register.
978
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
979
*/
980
static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
981
int byteop)
982
{
983
void *p;
984
int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
985
986
if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
987
p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
988
else
989
p = reg_rmw(ctxt, modrm_reg);
990
return p;
991
}
992
993
static int read_descriptor(struct x86_emulate_ctxt *ctxt,
994
struct segmented_address addr,
995
u16 *size, unsigned long *address, int op_bytes)
996
{
997
int rc;
998
999
if (op_bytes == 2)
1000
op_bytes = 3;
1001
*address = 0;
1002
rc = segmented_read_std(ctxt, addr, size, 2);
1003
if (rc != X86EMUL_CONTINUE)
1004
return rc;
1005
addr.ea += 2;
1006
rc = segmented_read_std(ctxt, addr, address, op_bytes);
1007
return rc;
1008
}
1009
1010
FASTOP2(add);
1011
FASTOP2(or);
1012
FASTOP2(adc);
1013
FASTOP2(sbb);
1014
FASTOP2(and);
1015
FASTOP2(sub);
1016
FASTOP2(xor);
1017
FASTOP2(cmp);
1018
FASTOP2(test);
1019
1020
FASTOP1SRC2(mul, mul_ex);
1021
FASTOP1SRC2(imul, imul_ex);
1022
FASTOP1SRC2EX(div, div_ex);
1023
FASTOP1SRC2EX(idiv, idiv_ex);
1024
1025
FASTOP3WCL(shld);
1026
FASTOP3WCL(shrd);
1027
1028
FASTOP2W(imul);
1029
1030
FASTOP1(not);
1031
FASTOP1(neg);
1032
FASTOP1(inc);
1033
FASTOP1(dec);
1034
1035
FASTOP2CL(rol);
1036
FASTOP2CL(ror);
1037
FASTOP2CL(rcl);
1038
FASTOP2CL(rcr);
1039
FASTOP2CL(shl);
1040
FASTOP2CL(shr);
1041
FASTOP2CL(sar);
1042
1043
FASTOP2W(bsf);
1044
FASTOP2W(bsr);
1045
FASTOP2W(bt);
1046
FASTOP2W(bts);
1047
FASTOP2W(btr);
1048
FASTOP2W(btc);
1049
1050
FASTOP2(xadd);
1051
1052
FASTOP2R(cmp, cmp_r);
1053
1054
static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1055
{
1056
/* If src is zero, do not writeback, but update flags */
1057
if (ctxt->src.val == 0)
1058
ctxt->dst.type = OP_NONE;
1059
return fastop(ctxt, em_bsf);
1060
}
1061
1062
static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1063
{
1064
/* If src is zero, do not writeback, but update flags */
1065
if (ctxt->src.val == 0)
1066
ctxt->dst.type = OP_NONE;
1067
return fastop(ctxt, em_bsr);
1068
}
1069
1070
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1071
{
1072
u8 rc;
1073
void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
1074
1075
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1076
asm("push %[flags]; popf; " CALL_NOSPEC
1077
: "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags));
1078
return rc;
1079
}
1080
1081
static void fetch_register_operand(struct operand *op)
1082
{
1083
switch (op->bytes) {
1084
case 1:
1085
op->val = *(u8 *)op->addr.reg;
1086
break;
1087
case 2:
1088
op->val = *(u16 *)op->addr.reg;
1089
break;
1090
case 4:
1091
op->val = *(u32 *)op->addr.reg;
1092
break;
1093
case 8:
1094
op->val = *(u64 *)op->addr.reg;
1095
break;
1096
}
1097
}
1098
1099
static int em_fninit(struct x86_emulate_ctxt *ctxt)
1100
{
1101
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1102
return emulate_nm(ctxt);
1103
1104
kvm_fpu_get();
1105
asm volatile("fninit");
1106
kvm_fpu_put();
1107
return X86EMUL_CONTINUE;
1108
}
1109
1110
static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1111
{
1112
u16 fcw;
1113
1114
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1115
return emulate_nm(ctxt);
1116
1117
kvm_fpu_get();
1118
asm volatile("fnstcw %0": "+m"(fcw));
1119
kvm_fpu_put();
1120
1121
ctxt->dst.val = fcw;
1122
1123
return X86EMUL_CONTINUE;
1124
}
1125
1126
static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1127
{
1128
u16 fsw;
1129
1130
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1131
return emulate_nm(ctxt);
1132
1133
kvm_fpu_get();
1134
asm volatile("fnstsw %0": "+m"(fsw));
1135
kvm_fpu_put();
1136
1137
ctxt->dst.val = fsw;
1138
1139
return X86EMUL_CONTINUE;
1140
}
1141
1142
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1143
struct operand *op)
1144
{
1145
unsigned int reg;
1146
1147
if (ctxt->d & ModRM)
1148
reg = ctxt->modrm_reg;
1149
else
1150
reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1151
1152
if (ctxt->d & Sse) {
1153
op->type = OP_XMM;
1154
op->bytes = 16;
1155
op->addr.xmm = reg;
1156
kvm_read_sse_reg(reg, &op->vec_val);
1157
return;
1158
}
1159
if (ctxt->d & Mmx) {
1160
reg &= 7;
1161
op->type = OP_MM;
1162
op->bytes = 8;
1163
op->addr.mm = reg;
1164
return;
1165
}
1166
1167
op->type = OP_REG;
1168
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1169
op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1170
1171
fetch_register_operand(op);
1172
op->orig_val = op->val;
1173
}
1174
1175
static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1176
{
1177
if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1178
ctxt->modrm_seg = VCPU_SREG_SS;
1179
}
1180
1181
static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1182
struct operand *op)
1183
{
1184
u8 sib;
1185
int index_reg, base_reg, scale;
1186
int rc = X86EMUL_CONTINUE;
1187
ulong modrm_ea = 0;
1188
1189
ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1190
index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1191
base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1192
1193
ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1194
ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1195
ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1196
ctxt->modrm_seg = VCPU_SREG_DS;
1197
1198
if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1199
op->type = OP_REG;
1200
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1201
op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1202
ctxt->d & ByteOp);
1203
if (ctxt->d & Sse) {
1204
op->type = OP_XMM;
1205
op->bytes = 16;
1206
op->addr.xmm = ctxt->modrm_rm;
1207
kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1208
return rc;
1209
}
1210
if (ctxt->d & Mmx) {
1211
op->type = OP_MM;
1212
op->bytes = 8;
1213
op->addr.mm = ctxt->modrm_rm & 7;
1214
return rc;
1215
}
1216
fetch_register_operand(op);
1217
return rc;
1218
}
1219
1220
op->type = OP_MEM;
1221
1222
if (ctxt->ad_bytes == 2) {
1223
unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1224
unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1225
unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1226
unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1227
1228
/* 16-bit ModR/M decode. */
1229
switch (ctxt->modrm_mod) {
1230
case 0:
1231
if (ctxt->modrm_rm == 6)
1232
modrm_ea += insn_fetch(u16, ctxt);
1233
break;
1234
case 1:
1235
modrm_ea += insn_fetch(s8, ctxt);
1236
break;
1237
case 2:
1238
modrm_ea += insn_fetch(u16, ctxt);
1239
break;
1240
}
1241
switch (ctxt->modrm_rm) {
1242
case 0:
1243
modrm_ea += bx + si;
1244
break;
1245
case 1:
1246
modrm_ea += bx + di;
1247
break;
1248
case 2:
1249
modrm_ea += bp + si;
1250
break;
1251
case 3:
1252
modrm_ea += bp + di;
1253
break;
1254
case 4:
1255
modrm_ea += si;
1256
break;
1257
case 5:
1258
modrm_ea += di;
1259
break;
1260
case 6:
1261
if (ctxt->modrm_mod != 0)
1262
modrm_ea += bp;
1263
break;
1264
case 7:
1265
modrm_ea += bx;
1266
break;
1267
}
1268
if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1269
(ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1270
ctxt->modrm_seg = VCPU_SREG_SS;
1271
modrm_ea = (u16)modrm_ea;
1272
} else {
1273
/* 32/64-bit ModR/M decode. */
1274
if ((ctxt->modrm_rm & 7) == 4) {
1275
sib = insn_fetch(u8, ctxt);
1276
index_reg |= (sib >> 3) & 7;
1277
base_reg |= sib & 7;
1278
scale = sib >> 6;
1279
1280
if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1281
modrm_ea += insn_fetch(s32, ctxt);
1282
else {
1283
modrm_ea += reg_read(ctxt, base_reg);
1284
adjust_modrm_seg(ctxt, base_reg);
1285
/* Increment ESP on POP [ESP] */
1286
if ((ctxt->d & IncSP) &&
1287
base_reg == VCPU_REGS_RSP)
1288
modrm_ea += ctxt->op_bytes;
1289
}
1290
if (index_reg != 4)
1291
modrm_ea += reg_read(ctxt, index_reg) << scale;
1292
} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1293
modrm_ea += insn_fetch(s32, ctxt);
1294
if (ctxt->mode == X86EMUL_MODE_PROT64)
1295
ctxt->rip_relative = 1;
1296
} else {
1297
base_reg = ctxt->modrm_rm;
1298
modrm_ea += reg_read(ctxt, base_reg);
1299
adjust_modrm_seg(ctxt, base_reg);
1300
}
1301
switch (ctxt->modrm_mod) {
1302
case 1:
1303
modrm_ea += insn_fetch(s8, ctxt);
1304
break;
1305
case 2:
1306
modrm_ea += insn_fetch(s32, ctxt);
1307
break;
1308
}
1309
}
1310
op->addr.mem.ea = modrm_ea;
1311
if (ctxt->ad_bytes != 8)
1312
ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1313
1314
done:
1315
return rc;
1316
}
1317
1318
static int decode_abs(struct x86_emulate_ctxt *ctxt,
1319
struct operand *op)
1320
{
1321
int rc = X86EMUL_CONTINUE;
1322
1323
op->type = OP_MEM;
1324
switch (ctxt->ad_bytes) {
1325
case 2:
1326
op->addr.mem.ea = insn_fetch(u16, ctxt);
1327
break;
1328
case 4:
1329
op->addr.mem.ea = insn_fetch(u32, ctxt);
1330
break;
1331
case 8:
1332
op->addr.mem.ea = insn_fetch(u64, ctxt);
1333
break;
1334
}
1335
done:
1336
return rc;
1337
}
1338
1339
static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1340
{
1341
long sv = 0, mask;
1342
1343
if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1344
mask = ~((long)ctxt->dst.bytes * 8 - 1);
1345
1346
if (ctxt->src.bytes == 2)
1347
sv = (s16)ctxt->src.val & (s16)mask;
1348
else if (ctxt->src.bytes == 4)
1349
sv = (s32)ctxt->src.val & (s32)mask;
1350
else
1351
sv = (s64)ctxt->src.val & (s64)mask;
1352
1353
ctxt->dst.addr.mem.ea = address_mask(ctxt,
1354
ctxt->dst.addr.mem.ea + (sv >> 3));
1355
}
1356
1357
/* only subword offset */
1358
ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1359
}
1360
1361
static int read_emulated(struct x86_emulate_ctxt *ctxt,
1362
unsigned long addr, void *dest, unsigned size)
1363
{
1364
int rc;
1365
struct read_cache *mc = &ctxt->mem_read;
1366
1367
if (mc->pos < mc->end)
1368
goto read_cached;
1369
1370
if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1371
return X86EMUL_UNHANDLEABLE;
1372
1373
rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1374
&ctxt->exception);
1375
if (rc != X86EMUL_CONTINUE)
1376
return rc;
1377
1378
mc->end += size;
1379
1380
read_cached:
1381
memcpy(dest, mc->data + mc->pos, size);
1382
mc->pos += size;
1383
return X86EMUL_CONTINUE;
1384
}
1385
1386
static int segmented_read(struct x86_emulate_ctxt *ctxt,
1387
struct segmented_address addr,
1388
void *data,
1389
unsigned size)
1390
{
1391
int rc;
1392
ulong linear;
1393
1394
rc = linearize(ctxt, addr, size, false, &linear);
1395
if (rc != X86EMUL_CONTINUE)
1396
return rc;
1397
return read_emulated(ctxt, linear, data, size);
1398
}
1399
1400
static int segmented_write(struct x86_emulate_ctxt *ctxt,
1401
struct segmented_address addr,
1402
const void *data,
1403
unsigned size)
1404
{
1405
int rc;
1406
ulong linear;
1407
1408
rc = linearize(ctxt, addr, size, true, &linear);
1409
if (rc != X86EMUL_CONTINUE)
1410
return rc;
1411
return ctxt->ops->write_emulated(ctxt, linear, data, size,
1412
&ctxt->exception);
1413
}
1414
1415
static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1416
struct segmented_address addr,
1417
const void *orig_data, const void *data,
1418
unsigned size)
1419
{
1420
int rc;
1421
ulong linear;
1422
1423
rc = linearize(ctxt, addr, size, true, &linear);
1424
if (rc != X86EMUL_CONTINUE)
1425
return rc;
1426
return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1427
size, &ctxt->exception);
1428
}
1429
1430
static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1431
unsigned int size, unsigned short port,
1432
void *dest)
1433
{
1434
struct read_cache *rc = &ctxt->io_read;
1435
1436
if (rc->pos == rc->end) { /* refill pio read ahead */
1437
unsigned int in_page, n;
1438
unsigned int count = ctxt->rep_prefix ?
1439
address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1440
in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1441
offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1442
PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1443
n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1444
if (n == 0)
1445
n = 1;
1446
rc->pos = rc->end = 0;
1447
if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1448
return 0;
1449
rc->end = n * size;
1450
}
1451
1452
if (ctxt->rep_prefix && (ctxt->d & String) &&
1453
!(ctxt->eflags & X86_EFLAGS_DF)) {
1454
ctxt->dst.data = rc->data + rc->pos;
1455
ctxt->dst.type = OP_MEM_STR;
1456
ctxt->dst.count = (rc->end - rc->pos) / size;
1457
rc->pos = rc->end;
1458
} else {
1459
memcpy(dest, rc->data + rc->pos, size);
1460
rc->pos += size;
1461
}
1462
return 1;
1463
}
1464
1465
static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1466
u16 index, struct desc_struct *desc)
1467
{
1468
struct desc_ptr dt;
1469
ulong addr;
1470
1471
ctxt->ops->get_idt(ctxt, &dt);
1472
1473
if (dt.size < index * 8 + 7)
1474
return emulate_gp(ctxt, index << 3 | 0x2);
1475
1476
addr = dt.address + index * 8;
1477
return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1478
}
1479
1480
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1481
u16 selector, struct desc_ptr *dt)
1482
{
1483
const struct x86_emulate_ops *ops = ctxt->ops;
1484
u32 base3 = 0;
1485
1486
if (selector & 1 << 2) {
1487
struct desc_struct desc;
1488
u16 sel;
1489
1490
memset(dt, 0, sizeof(*dt));
1491
if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1492
VCPU_SREG_LDTR))
1493
return;
1494
1495
dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1496
dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1497
} else
1498
ops->get_gdt(ctxt, dt);
1499
}
1500
1501
static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1502
u16 selector, ulong *desc_addr_p)
1503
{
1504
struct desc_ptr dt;
1505
u16 index = selector >> 3;
1506
ulong addr;
1507
1508
get_descriptor_table_ptr(ctxt, selector, &dt);
1509
1510
if (dt.size < index * 8 + 7)
1511
return emulate_gp(ctxt, selector & 0xfffc);
1512
1513
addr = dt.address + index * 8;
1514
1515
#ifdef CONFIG_X86_64
1516
if (addr >> 32 != 0) {
1517
u64 efer = 0;
1518
1519
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1520
if (!(efer & EFER_LMA))
1521
addr &= (u32)-1;
1522
}
1523
#endif
1524
1525
*desc_addr_p = addr;
1526
return X86EMUL_CONTINUE;
1527
}
1528
1529
/* allowed just for 8 bytes segments */
1530
static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1531
u16 selector, struct desc_struct *desc,
1532
ulong *desc_addr_p)
1533
{
1534
int rc;
1535
1536
rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1537
if (rc != X86EMUL_CONTINUE)
1538
return rc;
1539
1540
return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1541
}
1542
1543
/* allowed just for 8 bytes segments */
1544
static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1545
u16 selector, struct desc_struct *desc)
1546
{
1547
int rc;
1548
ulong addr;
1549
1550
rc = get_descriptor_ptr(ctxt, selector, &addr);
1551
if (rc != X86EMUL_CONTINUE)
1552
return rc;
1553
1554
return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1555
}
1556
1557
static bool emulator_is_ssp_invalid(struct x86_emulate_ctxt *ctxt, u8 cpl)
1558
{
1559
const u32 MSR_IA32_X_CET = cpl == 3 ? MSR_IA32_U_CET : MSR_IA32_S_CET;
1560
u64 efer = 0, cet = 0, ssp = 0;
1561
1562
if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET))
1563
return false;
1564
1565
if (ctxt->ops->get_msr(ctxt, MSR_EFER, &efer))
1566
return true;
1567
1568
/* SSP is guaranteed to be valid if the vCPU was already in 32-bit mode. */
1569
if (!(efer & EFER_LMA))
1570
return false;
1571
1572
if (ctxt->ops->get_msr(ctxt, MSR_IA32_X_CET, &cet))
1573
return true;
1574
1575
if (!(cet & CET_SHSTK_EN))
1576
return false;
1577
1578
if (ctxt->ops->get_msr(ctxt, MSR_KVM_INTERNAL_GUEST_SSP, &ssp))
1579
return true;
1580
1581
/*
1582
* On transfer from 64-bit mode to compatibility mode, SSP[63:32] must
1583
* be 0, i.e. SSP must be a 32-bit value outside of 64-bit mode.
1584
*/
1585
return ssp >> 32;
1586
}
1587
1588
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1589
u16 selector, int seg, u8 cpl,
1590
enum x86_transfer_type transfer,
1591
struct desc_struct *desc)
1592
{
1593
struct desc_struct seg_desc, old_desc;
1594
u8 dpl, rpl;
1595
unsigned err_vec = GP_VECTOR;
1596
u32 err_code = 0;
1597
bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1598
ulong desc_addr;
1599
int ret;
1600
u16 dummy;
1601
u32 base3 = 0;
1602
1603
memset(&seg_desc, 0, sizeof(seg_desc));
1604
1605
if (ctxt->mode == X86EMUL_MODE_REAL) {
1606
/* set real mode segment descriptor (keep limit etc. for
1607
* unreal mode) */
1608
ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1609
set_desc_base(&seg_desc, selector << 4);
1610
goto load;
1611
} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1612
/* VM86 needs a clean new segment descriptor */
1613
set_desc_base(&seg_desc, selector << 4);
1614
set_desc_limit(&seg_desc, 0xffff);
1615
seg_desc.type = 3;
1616
seg_desc.p = 1;
1617
seg_desc.s = 1;
1618
seg_desc.dpl = 3;
1619
goto load;
1620
}
1621
1622
rpl = selector & 3;
1623
1624
/* TR should be in GDT only */
1625
if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1626
goto exception;
1627
1628
/* NULL selector is not valid for TR, CS and (except for long mode) SS */
1629
if (null_selector) {
1630
if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1631
goto exception;
1632
1633
if (seg == VCPU_SREG_SS) {
1634
if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1635
goto exception;
1636
1637
/*
1638
* ctxt->ops->set_segment expects the CPL to be in
1639
* SS.DPL, so fake an expand-up 32-bit data segment.
1640
*/
1641
seg_desc.type = 3;
1642
seg_desc.p = 1;
1643
seg_desc.s = 1;
1644
seg_desc.dpl = cpl;
1645
seg_desc.d = 1;
1646
seg_desc.g = 1;
1647
}
1648
1649
/* Skip all following checks */
1650
goto load;
1651
}
1652
1653
ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1654
if (ret != X86EMUL_CONTINUE)
1655
return ret;
1656
1657
err_code = selector & 0xfffc;
1658
err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1659
GP_VECTOR;
1660
1661
/* can't load system descriptor into segment selector */
1662
if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1663
if (transfer == X86_TRANSFER_CALL_JMP)
1664
return X86EMUL_UNHANDLEABLE;
1665
goto exception;
1666
}
1667
1668
dpl = seg_desc.dpl;
1669
1670
switch (seg) {
1671
case VCPU_SREG_SS:
1672
/*
1673
* segment is not a writable data segment or segment
1674
* selector's RPL != CPL or DPL != CPL
1675
*/
1676
if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1677
goto exception;
1678
break;
1679
case VCPU_SREG_CS:
1680
/*
1681
* KVM uses "none" when loading CS as part of emulating Real
1682
* Mode exceptions and IRET (handled above). In all other
1683
* cases, loading CS without a control transfer is a KVM bug.
1684
*/
1685
if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1686
goto exception;
1687
1688
if (!(seg_desc.type & 8))
1689
goto exception;
1690
1691
if (transfer == X86_TRANSFER_RET) {
1692
/* RET can never return to an inner privilege level. */
1693
if (rpl < cpl)
1694
goto exception;
1695
/* Outer-privilege level return is not implemented */
1696
if (rpl > cpl)
1697
return X86EMUL_UNHANDLEABLE;
1698
}
1699
if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1700
if (seg_desc.type & 4) {
1701
/* conforming */
1702
if (dpl > rpl)
1703
goto exception;
1704
} else {
1705
/* nonconforming */
1706
if (dpl != rpl)
1707
goto exception;
1708
}
1709
} else { /* X86_TRANSFER_CALL_JMP */
1710
if (seg_desc.type & 4) {
1711
/* conforming */
1712
if (dpl > cpl)
1713
goto exception;
1714
} else {
1715
/* nonconforming */
1716
if (rpl > cpl || dpl != cpl)
1717
goto exception;
1718
}
1719
}
1720
/* in long-mode d/b must be clear if l is set */
1721
if (seg_desc.d && seg_desc.l) {
1722
u64 efer = 0;
1723
1724
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1725
if (efer & EFER_LMA)
1726
goto exception;
1727
}
1728
if (!seg_desc.l && emulator_is_ssp_invalid(ctxt, cpl)) {
1729
err_code = 0;
1730
goto exception;
1731
}
1732
1733
/* CS(RPL) <- CPL */
1734
selector = (selector & 0xfffc) | cpl;
1735
break;
1736
case VCPU_SREG_TR:
1737
if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1738
goto exception;
1739
break;
1740
case VCPU_SREG_LDTR:
1741
if (seg_desc.s || seg_desc.type != 2)
1742
goto exception;
1743
break;
1744
default: /* DS, ES, FS, or GS */
1745
/*
1746
* segment is not a data or readable code segment or
1747
* ((segment is a data or nonconforming code segment)
1748
* and ((RPL > DPL) or (CPL > DPL)))
1749
*/
1750
if ((seg_desc.type & 0xa) == 0x8 ||
1751
(((seg_desc.type & 0xc) != 0xc) &&
1752
(rpl > dpl || cpl > dpl)))
1753
goto exception;
1754
break;
1755
}
1756
1757
if (!seg_desc.p) {
1758
err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1759
goto exception;
1760
}
1761
1762
if (seg_desc.s) {
1763
/* mark segment as accessed */
1764
if (!(seg_desc.type & 1)) {
1765
seg_desc.type |= 1;
1766
ret = write_segment_descriptor(ctxt, selector,
1767
&seg_desc);
1768
if (ret != X86EMUL_CONTINUE)
1769
return ret;
1770
}
1771
} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1772
ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1773
if (ret != X86EMUL_CONTINUE)
1774
return ret;
1775
if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1776
((u64)base3 << 32), ctxt,
1777
X86EMUL_F_DT_LOAD))
1778
return emulate_gp(ctxt, err_code);
1779
}
1780
1781
if (seg == VCPU_SREG_TR) {
1782
old_desc = seg_desc;
1783
seg_desc.type |= 2; /* busy */
1784
ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1785
sizeof(seg_desc), &ctxt->exception);
1786
if (ret != X86EMUL_CONTINUE)
1787
return ret;
1788
}
1789
load:
1790
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1791
if (desc)
1792
*desc = seg_desc;
1793
return X86EMUL_CONTINUE;
1794
exception:
1795
return emulate_exception(ctxt, err_vec, err_code, true);
1796
}
1797
1798
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1799
u16 selector, int seg)
1800
{
1801
u8 cpl = ctxt->ops->cpl(ctxt);
1802
1803
/*
1804
* None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1805
* they can load it at CPL<3 (Intel's manual says only LSS can,
1806
* but it's wrong).
1807
*
1808
* However, the Intel manual says that putting IST=1/DPL=3 in
1809
* an interrupt gate will result in SS=3 (the AMD manual instead
1810
* says it doesn't), so allow SS=3 in __load_segment_descriptor
1811
* and only forbid it here.
1812
*/
1813
if (seg == VCPU_SREG_SS && selector == 3 &&
1814
ctxt->mode == X86EMUL_MODE_PROT64)
1815
return emulate_exception(ctxt, GP_VECTOR, 0, true);
1816
1817
return __load_segment_descriptor(ctxt, selector, seg, cpl,
1818
X86_TRANSFER_NONE, NULL);
1819
}
1820
1821
static void write_register_operand(struct operand *op)
1822
{
1823
return assign_register(op->addr.reg, op->val, op->bytes);
1824
}
1825
1826
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1827
{
1828
switch (op->type) {
1829
case OP_REG:
1830
write_register_operand(op);
1831
break;
1832
case OP_MEM:
1833
if (ctxt->lock_prefix)
1834
return segmented_cmpxchg(ctxt,
1835
op->addr.mem,
1836
&op->orig_val,
1837
&op->val,
1838
op->bytes);
1839
else
1840
return segmented_write(ctxt,
1841
op->addr.mem,
1842
&op->val,
1843
op->bytes);
1844
case OP_MEM_STR:
1845
return segmented_write(ctxt,
1846
op->addr.mem,
1847
op->data,
1848
op->bytes * op->count);
1849
case OP_XMM:
1850
kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1851
break;
1852
case OP_MM:
1853
kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1854
break;
1855
case OP_NONE:
1856
/* no writeback */
1857
break;
1858
default:
1859
break;
1860
}
1861
return X86EMUL_CONTINUE;
1862
}
1863
1864
static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len)
1865
{
1866
struct segmented_address addr;
1867
1868
rsp_increment(ctxt, -len);
1869
addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1870
addr.seg = VCPU_SREG_SS;
1871
1872
return segmented_write(ctxt, addr, data, len);
1873
}
1874
1875
static int em_push(struct x86_emulate_ctxt *ctxt)
1876
{
1877
/* Disable writeback. */
1878
ctxt->dst.type = OP_NONE;
1879
return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1880
}
1881
1882
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1883
void *dest, int len)
1884
{
1885
int rc;
1886
struct segmented_address addr;
1887
1888
addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1889
addr.seg = VCPU_SREG_SS;
1890
rc = segmented_read(ctxt, addr, dest, len);
1891
if (rc != X86EMUL_CONTINUE)
1892
return rc;
1893
1894
rsp_increment(ctxt, len);
1895
return rc;
1896
}
1897
1898
static int em_pop(struct x86_emulate_ctxt *ctxt)
1899
{
1900
return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1901
}
1902
1903
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1904
void *dest, int len)
1905
{
1906
int rc;
1907
unsigned long val = 0;
1908
unsigned long change_mask;
1909
int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1910
int cpl = ctxt->ops->cpl(ctxt);
1911
1912
rc = emulate_pop(ctxt, &val, len);
1913
if (rc != X86EMUL_CONTINUE)
1914
return rc;
1915
1916
change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1917
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1918
X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1919
X86_EFLAGS_AC | X86_EFLAGS_ID;
1920
1921
switch(ctxt->mode) {
1922
case X86EMUL_MODE_PROT64:
1923
case X86EMUL_MODE_PROT32:
1924
case X86EMUL_MODE_PROT16:
1925
if (cpl == 0)
1926
change_mask |= X86_EFLAGS_IOPL;
1927
if (cpl <= iopl)
1928
change_mask |= X86_EFLAGS_IF;
1929
break;
1930
case X86EMUL_MODE_VM86:
1931
if (iopl < 3)
1932
return emulate_gp(ctxt, 0);
1933
change_mask |= X86_EFLAGS_IF;
1934
break;
1935
default: /* real mode */
1936
change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1937
break;
1938
}
1939
1940
*(unsigned long *)dest =
1941
(ctxt->eflags & ~change_mask) | (val & change_mask);
1942
1943
return rc;
1944
}
1945
1946
static int em_popf(struct x86_emulate_ctxt *ctxt)
1947
{
1948
ctxt->dst.type = OP_REG;
1949
ctxt->dst.addr.reg = &ctxt->eflags;
1950
ctxt->dst.bytes = ctxt->op_bytes;
1951
return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1952
}
1953
1954
static int em_enter(struct x86_emulate_ctxt *ctxt)
1955
{
1956
int rc;
1957
unsigned frame_size = ctxt->src.val;
1958
unsigned nesting_level = ctxt->src2.val & 31;
1959
ulong rbp;
1960
1961
if (nesting_level)
1962
return X86EMUL_UNHANDLEABLE;
1963
1964
rbp = reg_read(ctxt, VCPU_REGS_RBP);
1965
rc = emulate_push(ctxt, &rbp, stack_size(ctxt));
1966
if (rc != X86EMUL_CONTINUE)
1967
return rc;
1968
assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1969
stack_mask(ctxt));
1970
assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1971
reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1972
stack_mask(ctxt));
1973
return X86EMUL_CONTINUE;
1974
}
1975
1976
static int em_leave(struct x86_emulate_ctxt *ctxt)
1977
{
1978
assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1979
stack_mask(ctxt));
1980
return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1981
}
1982
1983
static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1984
{
1985
int seg = ctxt->src2.val;
1986
1987
ctxt->src.val = get_segment_selector(ctxt, seg);
1988
if (ctxt->op_bytes == 4) {
1989
rsp_increment(ctxt, -2);
1990
ctxt->op_bytes = 2;
1991
}
1992
1993
return em_push(ctxt);
1994
}
1995
1996
static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1997
{
1998
int seg = ctxt->src2.val;
1999
unsigned long selector = 0;
2000
int rc;
2001
2002
rc = emulate_pop(ctxt, &selector, 2);
2003
if (rc != X86EMUL_CONTINUE)
2004
return rc;
2005
2006
if (seg == VCPU_SREG_SS)
2007
ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
2008
if (ctxt->op_bytes > 2)
2009
rsp_increment(ctxt, ctxt->op_bytes - 2);
2010
2011
rc = load_segment_descriptor(ctxt, (u16)selector, seg);
2012
return rc;
2013
}
2014
2015
static int em_pusha(struct x86_emulate_ctxt *ctxt)
2016
{
2017
unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
2018
int rc = X86EMUL_CONTINUE;
2019
int reg = VCPU_REGS_RAX;
2020
2021
while (reg <= VCPU_REGS_RDI) {
2022
(reg == VCPU_REGS_RSP) ?
2023
(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
2024
2025
rc = em_push(ctxt);
2026
if (rc != X86EMUL_CONTINUE)
2027
return rc;
2028
2029
++reg;
2030
}
2031
2032
return rc;
2033
}
2034
2035
static int em_pushf(struct x86_emulate_ctxt *ctxt)
2036
{
2037
ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
2038
return em_push(ctxt);
2039
}
2040
2041
static int em_popa(struct x86_emulate_ctxt *ctxt)
2042
{
2043
int rc = X86EMUL_CONTINUE;
2044
int reg = VCPU_REGS_RDI;
2045
u32 val = 0;
2046
2047
while (reg >= VCPU_REGS_RAX) {
2048
if (reg == VCPU_REGS_RSP) {
2049
rsp_increment(ctxt, ctxt->op_bytes);
2050
--reg;
2051
}
2052
2053
rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2054
if (rc != X86EMUL_CONTINUE)
2055
break;
2056
assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2057
--reg;
2058
}
2059
return rc;
2060
}
2061
2062
static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2063
{
2064
const struct x86_emulate_ops *ops = ctxt->ops;
2065
int rc;
2066
struct desc_ptr dt;
2067
gva_t cs_addr;
2068
gva_t eip_addr;
2069
u16 cs, eip;
2070
2071
/* TODO: Add limit checks */
2072
ctxt->src.val = ctxt->eflags;
2073
rc = em_push(ctxt);
2074
if (rc != X86EMUL_CONTINUE)
2075
return rc;
2076
2077
ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2078
2079
ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2080
rc = em_push(ctxt);
2081
if (rc != X86EMUL_CONTINUE)
2082
return rc;
2083
2084
ctxt->src.val = ctxt->_eip;
2085
rc = em_push(ctxt);
2086
if (rc != X86EMUL_CONTINUE)
2087
return rc;
2088
2089
ops->get_idt(ctxt, &dt);
2090
2091
eip_addr = dt.address + (irq << 2);
2092
cs_addr = dt.address + (irq << 2) + 2;
2093
2094
rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2095
if (rc != X86EMUL_CONTINUE)
2096
return rc;
2097
2098
rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2099
if (rc != X86EMUL_CONTINUE)
2100
return rc;
2101
2102
rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2103
if (rc != X86EMUL_CONTINUE)
2104
return rc;
2105
2106
ctxt->_eip = eip;
2107
2108
return rc;
2109
}
2110
2111
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2112
{
2113
int rc;
2114
2115
invalidate_registers(ctxt);
2116
rc = __emulate_int_real(ctxt, irq);
2117
if (rc == X86EMUL_CONTINUE)
2118
writeback_registers(ctxt);
2119
return rc;
2120
}
2121
2122
static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2123
{
2124
switch(ctxt->mode) {
2125
case X86EMUL_MODE_REAL:
2126
return __emulate_int_real(ctxt, irq);
2127
case X86EMUL_MODE_VM86:
2128
case X86EMUL_MODE_PROT16:
2129
case X86EMUL_MODE_PROT32:
2130
case X86EMUL_MODE_PROT64:
2131
default:
2132
/* Protected mode interrupts unimplemented yet */
2133
return X86EMUL_UNHANDLEABLE;
2134
}
2135
}
2136
2137
static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2138
{
2139
int rc = X86EMUL_CONTINUE;
2140
unsigned long temp_eip = 0;
2141
unsigned long temp_eflags = 0;
2142
unsigned long cs = 0;
2143
unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2144
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2145
X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2146
X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2147
X86_EFLAGS_AC | X86_EFLAGS_ID |
2148
X86_EFLAGS_FIXED;
2149
unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2150
X86_EFLAGS_VIP;
2151
2152
/* TODO: Add stack limit check */
2153
2154
rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2155
2156
if (rc != X86EMUL_CONTINUE)
2157
return rc;
2158
2159
if (temp_eip & ~0xffff)
2160
return emulate_gp(ctxt, 0);
2161
2162
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2163
2164
if (rc != X86EMUL_CONTINUE)
2165
return rc;
2166
2167
rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2168
2169
if (rc != X86EMUL_CONTINUE)
2170
return rc;
2171
2172
rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2173
2174
if (rc != X86EMUL_CONTINUE)
2175
return rc;
2176
2177
ctxt->_eip = temp_eip;
2178
2179
if (ctxt->op_bytes == 4)
2180
ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2181
else if (ctxt->op_bytes == 2) {
2182
ctxt->eflags &= ~0xffff;
2183
ctxt->eflags |= temp_eflags;
2184
}
2185
2186
ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2187
ctxt->eflags |= X86_EFLAGS_FIXED;
2188
ctxt->ops->set_nmi_mask(ctxt, false);
2189
2190
return rc;
2191
}
2192
2193
static int em_iret(struct x86_emulate_ctxt *ctxt)
2194
{
2195
switch(ctxt->mode) {
2196
case X86EMUL_MODE_REAL:
2197
return emulate_iret_real(ctxt);
2198
case X86EMUL_MODE_VM86:
2199
case X86EMUL_MODE_PROT16:
2200
case X86EMUL_MODE_PROT32:
2201
case X86EMUL_MODE_PROT64:
2202
default:
2203
/* iret from protected mode unimplemented yet */
2204
return X86EMUL_UNHANDLEABLE;
2205
}
2206
}
2207
2208
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2209
{
2210
int rc;
2211
unsigned short sel;
2212
struct desc_struct new_desc;
2213
u8 cpl = ctxt->ops->cpl(ctxt);
2214
2215
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2216
2217
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2218
X86_TRANSFER_CALL_JMP,
2219
&new_desc);
2220
if (rc != X86EMUL_CONTINUE)
2221
return rc;
2222
2223
rc = assign_eip_far(ctxt, ctxt->src.val);
2224
/* Error handling is not implemented. */
2225
if (rc != X86EMUL_CONTINUE)
2226
return X86EMUL_UNHANDLEABLE;
2227
2228
return rc;
2229
}
2230
2231
static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2232
{
2233
return assign_eip_near(ctxt, ctxt->src.val);
2234
}
2235
2236
static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2237
{
2238
int rc;
2239
long int old_eip;
2240
2241
old_eip = ctxt->_eip;
2242
rc = assign_eip_near(ctxt, ctxt->src.val);
2243
if (rc != X86EMUL_CONTINUE)
2244
return rc;
2245
ctxt->src.val = old_eip;
2246
rc = em_push(ctxt);
2247
return rc;
2248
}
2249
2250
static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2251
{
2252
u64 old = ctxt->dst.orig_val64;
2253
2254
if (ctxt->dst.bytes == 16)
2255
return X86EMUL_UNHANDLEABLE;
2256
2257
if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2258
((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2259
*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2260
*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2261
ctxt->eflags &= ~X86_EFLAGS_ZF;
2262
} else {
2263
ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2264
(u32) reg_read(ctxt, VCPU_REGS_RBX);
2265
2266
ctxt->eflags |= X86_EFLAGS_ZF;
2267
}
2268
return X86EMUL_CONTINUE;
2269
}
2270
2271
static int em_ret(struct x86_emulate_ctxt *ctxt)
2272
{
2273
int rc;
2274
unsigned long eip = 0;
2275
2276
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2277
if (rc != X86EMUL_CONTINUE)
2278
return rc;
2279
2280
return assign_eip_near(ctxt, eip);
2281
}
2282
2283
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2284
{
2285
int rc;
2286
unsigned long eip = 0;
2287
unsigned long cs = 0;
2288
int cpl = ctxt->ops->cpl(ctxt);
2289
struct desc_struct new_desc;
2290
2291
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2292
if (rc != X86EMUL_CONTINUE)
2293
return rc;
2294
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2295
if (rc != X86EMUL_CONTINUE)
2296
return rc;
2297
rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2298
X86_TRANSFER_RET,
2299
&new_desc);
2300
if (rc != X86EMUL_CONTINUE)
2301
return rc;
2302
rc = assign_eip_far(ctxt, eip);
2303
/* Error handling is not implemented. */
2304
if (rc != X86EMUL_CONTINUE)
2305
return X86EMUL_UNHANDLEABLE;
2306
2307
return rc;
2308
}
2309
2310
static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2311
{
2312
int rc;
2313
2314
rc = em_ret_far(ctxt);
2315
if (rc != X86EMUL_CONTINUE)
2316
return rc;
2317
rsp_increment(ctxt, ctxt->src.val);
2318
return X86EMUL_CONTINUE;
2319
}
2320
2321
static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2322
{
2323
/* Save real source value, then compare EAX against destination. */
2324
ctxt->dst.orig_val = ctxt->dst.val;
2325
ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2326
ctxt->src.orig_val = ctxt->src.val;
2327
ctxt->src.val = ctxt->dst.orig_val;
2328
fastop(ctxt, em_cmp);
2329
2330
if (ctxt->eflags & X86_EFLAGS_ZF) {
2331
/* Success: write back to memory; no update of EAX */
2332
ctxt->src.type = OP_NONE;
2333
ctxt->dst.val = ctxt->src.orig_val;
2334
} else {
2335
/* Failure: write the value we saw to EAX. */
2336
ctxt->src.type = OP_REG;
2337
ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2338
ctxt->src.val = ctxt->dst.orig_val;
2339
/* Create write-cycle to dest by writing the same value */
2340
ctxt->dst.val = ctxt->dst.orig_val;
2341
}
2342
return X86EMUL_CONTINUE;
2343
}
2344
2345
static int em_lseg(struct x86_emulate_ctxt *ctxt)
2346
{
2347
int seg = ctxt->src2.val;
2348
unsigned short sel;
2349
int rc;
2350
2351
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2352
2353
rc = load_segment_descriptor(ctxt, sel, seg);
2354
if (rc != X86EMUL_CONTINUE)
2355
return rc;
2356
2357
ctxt->dst.val = ctxt->src.val;
2358
return rc;
2359
}
2360
2361
static int em_rsm(struct x86_emulate_ctxt *ctxt)
2362
{
2363
if (!ctxt->ops->is_smm(ctxt))
2364
return emulate_ud(ctxt);
2365
2366
if (ctxt->ops->leave_smm(ctxt))
2367
ctxt->ops->triple_fault(ctxt);
2368
2369
return emulator_recalc_and_set_mode(ctxt);
2370
}
2371
2372
static void
2373
setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2374
{
2375
cs->l = 0; /* will be adjusted later */
2376
set_desc_base(cs, 0); /* flat segment */
2377
cs->g = 1; /* 4kb granularity */
2378
set_desc_limit(cs, 0xfffff); /* 4GB limit */
2379
cs->type = 0x0b; /* Read, Execute, Accessed */
2380
cs->s = 1;
2381
cs->dpl = 0; /* will be adjusted later */
2382
cs->p = 1;
2383
cs->d = 1;
2384
cs->avl = 0;
2385
2386
set_desc_base(ss, 0); /* flat segment */
2387
set_desc_limit(ss, 0xfffff); /* 4GB limit */
2388
ss->g = 1; /* 4kb granularity */
2389
ss->s = 1;
2390
ss->type = 0x03; /* Read/Write, Accessed */
2391
ss->d = 1; /* 32bit stack segment */
2392
ss->dpl = 0;
2393
ss->p = 1;
2394
ss->l = 0;
2395
ss->avl = 0;
2396
}
2397
2398
static int em_syscall(struct x86_emulate_ctxt *ctxt)
2399
{
2400
const struct x86_emulate_ops *ops = ctxt->ops;
2401
struct desc_struct cs, ss;
2402
u64 msr_data;
2403
u16 cs_sel, ss_sel;
2404
u64 efer = 0;
2405
2406
/* syscall is not available in real mode */
2407
if (ctxt->mode == X86EMUL_MODE_REAL ||
2408
ctxt->mode == X86EMUL_MODE_VM86)
2409
return emulate_ud(ctxt);
2410
2411
/*
2412
* Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas
2413
* AMD allows SYSCALL in any flavor of protected mode. Note, it's
2414
* infeasible to emulate Intel behavior when running on AMD hardware,
2415
* as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD
2416
* for KVM to trap-and-emulate, unlike emulating AMD on Intel.
2417
*/
2418
if (ctxt->mode != X86EMUL_MODE_PROT64 &&
2419
ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
2420
return emulate_ud(ctxt);
2421
2422
ops->get_msr(ctxt, MSR_EFER, &efer);
2423
if (!(efer & EFER_SCE))
2424
return emulate_ud(ctxt);
2425
2426
setup_syscalls_segments(&cs, &ss);
2427
ops->get_msr(ctxt, MSR_STAR, &msr_data);
2428
msr_data >>= 32;
2429
cs_sel = (u16)(msr_data & 0xfffc);
2430
ss_sel = (u16)(msr_data + 8);
2431
2432
if (efer & EFER_LMA) {
2433
cs.d = 0;
2434
cs.l = 1;
2435
}
2436
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2437
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2438
2439
*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2440
if (efer & EFER_LMA) {
2441
#ifdef CONFIG_X86_64
2442
*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2443
2444
ops->get_msr(ctxt,
2445
ctxt->mode == X86EMUL_MODE_PROT64 ?
2446
MSR_LSTAR : MSR_CSTAR, &msr_data);
2447
ctxt->_eip = msr_data;
2448
2449
ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2450
ctxt->eflags &= ~msr_data;
2451
ctxt->eflags |= X86_EFLAGS_FIXED;
2452
#endif
2453
} else {
2454
/* legacy mode */
2455
ops->get_msr(ctxt, MSR_STAR, &msr_data);
2456
ctxt->_eip = (u32)msr_data;
2457
2458
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2459
}
2460
2461
ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2462
return X86EMUL_CONTINUE;
2463
}
2464
2465
static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2466
{
2467
const struct x86_emulate_ops *ops = ctxt->ops;
2468
struct desc_struct cs, ss;
2469
u64 msr_data;
2470
u16 cs_sel, ss_sel;
2471
u64 efer = 0;
2472
2473
ops->get_msr(ctxt, MSR_EFER, &efer);
2474
/* inject #GP if in real mode */
2475
if (ctxt->mode == X86EMUL_MODE_REAL)
2476
return emulate_gp(ctxt, 0);
2477
2478
/*
2479
* Intel's architecture allows SYSENTER in compatibility mode, but AMD
2480
* does not. Note, AMD does allow SYSENTER in legacy protected mode.
2481
*/
2482
if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) &&
2483
!ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
2484
return emulate_ud(ctxt);
2485
2486
/* sysenter/sysexit have not been tested in 64bit mode. */
2487
if (ctxt->mode == X86EMUL_MODE_PROT64)
2488
return X86EMUL_UNHANDLEABLE;
2489
2490
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2491
if ((msr_data & 0xfffc) == 0x0)
2492
return emulate_gp(ctxt, 0);
2493
2494
setup_syscalls_segments(&cs, &ss);
2495
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2496
cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2497
ss_sel = cs_sel + 8;
2498
if (efer & EFER_LMA) {
2499
cs.d = 0;
2500
cs.l = 1;
2501
}
2502
2503
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2504
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2505
2506
ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2507
ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2508
2509
ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2510
*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2511
(u32)msr_data;
2512
if (efer & EFER_LMA)
2513
ctxt->mode = X86EMUL_MODE_PROT64;
2514
2515
return X86EMUL_CONTINUE;
2516
}
2517
2518
static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2519
{
2520
const struct x86_emulate_ops *ops = ctxt->ops;
2521
struct desc_struct cs, ss;
2522
u64 msr_data, rcx, rdx;
2523
int usermode;
2524
u16 cs_sel = 0, ss_sel = 0;
2525
2526
/* inject #GP if in real mode or Virtual 8086 mode */
2527
if (ctxt->mode == X86EMUL_MODE_REAL ||
2528
ctxt->mode == X86EMUL_MODE_VM86)
2529
return emulate_gp(ctxt, 0);
2530
2531
setup_syscalls_segments(&cs, &ss);
2532
2533
if ((ctxt->rex_prefix & 0x8) != 0x0)
2534
usermode = X86EMUL_MODE_PROT64;
2535
else
2536
usermode = X86EMUL_MODE_PROT32;
2537
2538
rcx = reg_read(ctxt, VCPU_REGS_RCX);
2539
rdx = reg_read(ctxt, VCPU_REGS_RDX);
2540
2541
cs.dpl = 3;
2542
ss.dpl = 3;
2543
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2544
switch (usermode) {
2545
case X86EMUL_MODE_PROT32:
2546
cs_sel = (u16)(msr_data + 16);
2547
if ((msr_data & 0xfffc) == 0x0)
2548
return emulate_gp(ctxt, 0);
2549
ss_sel = (u16)(msr_data + 24);
2550
rcx = (u32)rcx;
2551
rdx = (u32)rdx;
2552
break;
2553
case X86EMUL_MODE_PROT64:
2554
cs_sel = (u16)(msr_data + 32);
2555
if (msr_data == 0x0)
2556
return emulate_gp(ctxt, 0);
2557
ss_sel = cs_sel + 8;
2558
cs.d = 0;
2559
cs.l = 1;
2560
if (emul_is_noncanonical_address(rcx, ctxt, 0) ||
2561
emul_is_noncanonical_address(rdx, ctxt, 0))
2562
return emulate_gp(ctxt, 0);
2563
break;
2564
}
2565
cs_sel |= SEGMENT_RPL_MASK;
2566
ss_sel |= SEGMENT_RPL_MASK;
2567
2568
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2569
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2570
2571
ctxt->_eip = rdx;
2572
ctxt->mode = usermode;
2573
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2574
2575
return X86EMUL_CONTINUE;
2576
}
2577
2578
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2579
{
2580
int iopl;
2581
if (ctxt->mode == X86EMUL_MODE_REAL)
2582
return false;
2583
if (ctxt->mode == X86EMUL_MODE_VM86)
2584
return true;
2585
iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2586
return ctxt->ops->cpl(ctxt) > iopl;
2587
}
2588
2589
#define VMWARE_PORT_VMPORT (0x5658)
2590
#define VMWARE_PORT_VMRPC (0x5659)
2591
2592
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2593
u16 port, u16 len)
2594
{
2595
const struct x86_emulate_ops *ops = ctxt->ops;
2596
struct desc_struct tr_seg;
2597
u32 base3;
2598
int r;
2599
u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2600
unsigned mask = (1 << len) - 1;
2601
unsigned long base;
2602
2603
/*
2604
* VMware allows access to these ports even if denied
2605
* by TSS I/O permission bitmap. Mimic behavior.
2606
*/
2607
if (enable_vmware_backdoor &&
2608
((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2609
return true;
2610
2611
ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2612
if (!tr_seg.p)
2613
return false;
2614
if (desc_limit_scaled(&tr_seg) < 103)
2615
return false;
2616
base = get_desc_base(&tr_seg);
2617
#ifdef CONFIG_X86_64
2618
base |= ((u64)base3) << 32;
2619
#endif
2620
r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2621
if (r != X86EMUL_CONTINUE)
2622
return false;
2623
if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2624
return false;
2625
r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2626
if (r != X86EMUL_CONTINUE)
2627
return false;
2628
if ((perm >> bit_idx) & mask)
2629
return false;
2630
return true;
2631
}
2632
2633
static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt,
2634
u16 port, u16 len)
2635
{
2636
if (ctxt->perm_ok)
2637
return true;
2638
2639
if (emulator_bad_iopl(ctxt))
2640
if (!emulator_io_port_access_allowed(ctxt, port, len))
2641
return false;
2642
2643
ctxt->perm_ok = true;
2644
2645
return true;
2646
}
2647
2648
static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2649
{
2650
/*
2651
* Intel CPUs mask the counter and pointers in quite strange
2652
* manner when ECX is zero due to REP-string optimizations.
2653
*/
2654
#ifdef CONFIG_X86_64
2655
u32 eax, ebx, ecx, edx;
2656
2657
if (ctxt->ad_bytes != 4)
2658
return;
2659
2660
eax = ecx = 0;
2661
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2662
if (!is_guest_vendor_intel(ebx, ecx, edx))
2663
return;
2664
2665
*reg_write(ctxt, VCPU_REGS_RCX) = 0;
2666
2667
switch (ctxt->b) {
2668
case 0xa4: /* movsb */
2669
case 0xa5: /* movsd/w */
2670
*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2671
fallthrough;
2672
case 0xaa: /* stosb */
2673
case 0xab: /* stosd/w */
2674
*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2675
}
2676
#endif
2677
}
2678
2679
static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2680
struct tss_segment_16 *tss)
2681
{
2682
tss->ip = ctxt->_eip;
2683
tss->flag = ctxt->eflags;
2684
tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2685
tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2686
tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2687
tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2688
tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2689
tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2690
tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2691
tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2692
2693
tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2694
tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2695
tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2696
tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2697
tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2698
}
2699
2700
static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2701
struct tss_segment_16 *tss)
2702
{
2703
int ret;
2704
u8 cpl;
2705
2706
ctxt->_eip = tss->ip;
2707
ctxt->eflags = tss->flag | 2;
2708
*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2709
*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2710
*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2711
*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2712
*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2713
*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2714
*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2715
*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2716
2717
/*
2718
* SDM says that segment selectors are loaded before segment
2719
* descriptors
2720
*/
2721
set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2722
set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2723
set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2724
set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2725
set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2726
2727
cpl = tss->cs & 3;
2728
2729
/*
2730
* Now load segment descriptors. If fault happens at this stage
2731
* it is handled in a context of new task
2732
*/
2733
ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2734
X86_TRANSFER_TASK_SWITCH, NULL);
2735
if (ret != X86EMUL_CONTINUE)
2736
return ret;
2737
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2738
X86_TRANSFER_TASK_SWITCH, NULL);
2739
if (ret != X86EMUL_CONTINUE)
2740
return ret;
2741
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2742
X86_TRANSFER_TASK_SWITCH, NULL);
2743
if (ret != X86EMUL_CONTINUE)
2744
return ret;
2745
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2746
X86_TRANSFER_TASK_SWITCH, NULL);
2747
if (ret != X86EMUL_CONTINUE)
2748
return ret;
2749
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2750
X86_TRANSFER_TASK_SWITCH, NULL);
2751
if (ret != X86EMUL_CONTINUE)
2752
return ret;
2753
2754
return X86EMUL_CONTINUE;
2755
}
2756
2757
static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2758
ulong old_tss_base, struct desc_struct *new_desc)
2759
{
2760
struct tss_segment_16 tss_seg;
2761
int ret;
2762
u32 new_tss_base = get_desc_base(new_desc);
2763
2764
ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2765
if (ret != X86EMUL_CONTINUE)
2766
return ret;
2767
2768
save_state_to_tss16(ctxt, &tss_seg);
2769
2770
ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2771
if (ret != X86EMUL_CONTINUE)
2772
return ret;
2773
2774
ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2775
if (ret != X86EMUL_CONTINUE)
2776
return ret;
2777
2778
if (old_tss_sel != 0xffff) {
2779
tss_seg.prev_task_link = old_tss_sel;
2780
2781
ret = linear_write_system(ctxt, new_tss_base,
2782
&tss_seg.prev_task_link,
2783
sizeof(tss_seg.prev_task_link));
2784
if (ret != X86EMUL_CONTINUE)
2785
return ret;
2786
}
2787
2788
return load_state_from_tss16(ctxt, &tss_seg);
2789
}
2790
2791
static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2792
struct tss_segment_32 *tss)
2793
{
2794
/* CR3 and ldt selector are not saved intentionally */
2795
tss->eip = ctxt->_eip;
2796
tss->eflags = ctxt->eflags;
2797
tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2798
tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2799
tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2800
tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2801
tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2802
tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2803
tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2804
tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2805
2806
tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2807
tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2808
tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2809
tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2810
tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2811
tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2812
}
2813
2814
static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2815
struct tss_segment_32 *tss)
2816
{
2817
int ret;
2818
u8 cpl;
2819
2820
if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2821
return emulate_gp(ctxt, 0);
2822
ctxt->_eip = tss->eip;
2823
ctxt->eflags = tss->eflags | 2;
2824
2825
/* General purpose registers */
2826
*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2827
*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2828
*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2829
*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2830
*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2831
*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2832
*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2833
*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2834
2835
/*
2836
* SDM says that segment selectors are loaded before segment
2837
* descriptors. This is important because CPL checks will
2838
* use CS.RPL.
2839
*/
2840
set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2841
set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2842
set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2843
set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2844
set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2845
set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2846
set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2847
2848
/*
2849
* If we're switching between Protected Mode and VM86, we need to make
2850
* sure to update the mode before loading the segment descriptors so
2851
* that the selectors are interpreted correctly.
2852
*/
2853
if (ctxt->eflags & X86_EFLAGS_VM) {
2854
ctxt->mode = X86EMUL_MODE_VM86;
2855
cpl = 3;
2856
} else {
2857
ctxt->mode = X86EMUL_MODE_PROT32;
2858
cpl = tss->cs & 3;
2859
}
2860
2861
/*
2862
* Now load segment descriptors. If fault happens at this stage
2863
* it is handled in a context of new task
2864
*/
2865
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2866
cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2867
if (ret != X86EMUL_CONTINUE)
2868
return ret;
2869
ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2870
X86_TRANSFER_TASK_SWITCH, NULL);
2871
if (ret != X86EMUL_CONTINUE)
2872
return ret;
2873
ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2874
X86_TRANSFER_TASK_SWITCH, NULL);
2875
if (ret != X86EMUL_CONTINUE)
2876
return ret;
2877
ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2878
X86_TRANSFER_TASK_SWITCH, NULL);
2879
if (ret != X86EMUL_CONTINUE)
2880
return ret;
2881
ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2882
X86_TRANSFER_TASK_SWITCH, NULL);
2883
if (ret != X86EMUL_CONTINUE)
2884
return ret;
2885
ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2886
X86_TRANSFER_TASK_SWITCH, NULL);
2887
if (ret != X86EMUL_CONTINUE)
2888
return ret;
2889
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2890
X86_TRANSFER_TASK_SWITCH, NULL);
2891
2892
return ret;
2893
}
2894
2895
static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2896
ulong old_tss_base, struct desc_struct *new_desc)
2897
{
2898
struct tss_segment_32 tss_seg;
2899
int ret;
2900
u32 new_tss_base = get_desc_base(new_desc);
2901
u32 eip_offset = offsetof(struct tss_segment_32, eip);
2902
u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2903
2904
ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2905
if (ret != X86EMUL_CONTINUE)
2906
return ret;
2907
2908
save_state_to_tss32(ctxt, &tss_seg);
2909
2910
/* Only GP registers and segment selectors are saved */
2911
ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2912
ldt_sel_offset - eip_offset);
2913
if (ret != X86EMUL_CONTINUE)
2914
return ret;
2915
2916
ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2917
if (ret != X86EMUL_CONTINUE)
2918
return ret;
2919
2920
if (old_tss_sel != 0xffff) {
2921
tss_seg.prev_task_link = old_tss_sel;
2922
2923
ret = linear_write_system(ctxt, new_tss_base,
2924
&tss_seg.prev_task_link,
2925
sizeof(tss_seg.prev_task_link));
2926
if (ret != X86EMUL_CONTINUE)
2927
return ret;
2928
}
2929
2930
return load_state_from_tss32(ctxt, &tss_seg);
2931
}
2932
2933
static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2934
u16 tss_selector, int idt_index, int reason,
2935
bool has_error_code, u32 error_code)
2936
{
2937
const struct x86_emulate_ops *ops = ctxt->ops;
2938
struct desc_struct curr_tss_desc, next_tss_desc;
2939
int ret;
2940
u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2941
ulong old_tss_base =
2942
ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2943
u32 desc_limit;
2944
ulong desc_addr, dr7;
2945
2946
/* FIXME: old_tss_base == ~0 ? */
2947
2948
ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2949
if (ret != X86EMUL_CONTINUE)
2950
return ret;
2951
ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2952
if (ret != X86EMUL_CONTINUE)
2953
return ret;
2954
2955
/* FIXME: check that next_tss_desc is tss */
2956
2957
/*
2958
* Check privileges. The three cases are task switch caused by...
2959
*
2960
* 1. jmp/call/int to task gate: Check against DPL of the task gate
2961
* 2. Exception/IRQ/iret: No check is performed
2962
* 3. jmp/call to TSS/task-gate: No check is performed since the
2963
* hardware checks it before exiting.
2964
*/
2965
if (reason == TASK_SWITCH_GATE) {
2966
if (idt_index != -1) {
2967
/* Software interrupts */
2968
struct desc_struct task_gate_desc;
2969
int dpl;
2970
2971
ret = read_interrupt_descriptor(ctxt, idt_index,
2972
&task_gate_desc);
2973
if (ret != X86EMUL_CONTINUE)
2974
return ret;
2975
2976
dpl = task_gate_desc.dpl;
2977
if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2978
return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2979
}
2980
}
2981
2982
desc_limit = desc_limit_scaled(&next_tss_desc);
2983
if (!next_tss_desc.p ||
2984
((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2985
desc_limit < 0x2b)) {
2986
return emulate_ts(ctxt, tss_selector & 0xfffc);
2987
}
2988
2989
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2990
curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2991
write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2992
}
2993
2994
if (reason == TASK_SWITCH_IRET)
2995
ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2996
2997
/* set back link to prev task only if NT bit is set in eflags
2998
note that old_tss_sel is not used after this point */
2999
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3000
old_tss_sel = 0xffff;
3001
3002
if (next_tss_desc.type & 8)
3003
ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
3004
else
3005
ret = task_switch_16(ctxt, old_tss_sel,
3006
old_tss_base, &next_tss_desc);
3007
if (ret != X86EMUL_CONTINUE)
3008
return ret;
3009
3010
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3011
ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3012
3013
if (reason != TASK_SWITCH_IRET) {
3014
next_tss_desc.type |= (1 << 1); /* set busy flag */
3015
write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3016
}
3017
3018
ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3019
ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3020
3021
if (has_error_code) {
3022
ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3023
ctxt->lock_prefix = 0;
3024
ctxt->src.val = (unsigned long) error_code;
3025
ret = em_push(ctxt);
3026
}
3027
3028
dr7 = ops->get_dr(ctxt, 7);
3029
ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3030
3031
return ret;
3032
}
3033
3034
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3035
u16 tss_selector, int idt_index, int reason,
3036
bool has_error_code, u32 error_code)
3037
{
3038
int rc;
3039
3040
invalidate_registers(ctxt);
3041
ctxt->_eip = ctxt->eip;
3042
ctxt->dst.type = OP_NONE;
3043
3044
rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3045
has_error_code, error_code);
3046
3047
if (rc == X86EMUL_CONTINUE) {
3048
ctxt->eip = ctxt->_eip;
3049
writeback_registers(ctxt);
3050
}
3051
3052
return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3053
}
3054
3055
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3056
struct operand *op)
3057
{
3058
int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3059
3060
register_address_increment(ctxt, reg, df * op->bytes);
3061
op->addr.mem.ea = register_address(ctxt, reg);
3062
}
3063
3064
static int em_das(struct x86_emulate_ctxt *ctxt)
3065
{
3066
u8 al, old_al;
3067
bool af, cf, old_cf;
3068
3069
cf = ctxt->eflags & X86_EFLAGS_CF;
3070
al = ctxt->dst.val;
3071
3072
old_al = al;
3073
old_cf = cf;
3074
cf = false;
3075
af = ctxt->eflags & X86_EFLAGS_AF;
3076
if ((al & 0x0f) > 9 || af) {
3077
al -= 6;
3078
cf = old_cf | (al >= 250);
3079
af = true;
3080
} else {
3081
af = false;
3082
}
3083
if (old_al > 0x99 || old_cf) {
3084
al -= 0x60;
3085
cf = true;
3086
}
3087
3088
ctxt->dst.val = al;
3089
/* Set PF, ZF, SF */
3090
ctxt->src.type = OP_IMM;
3091
ctxt->src.val = 0;
3092
ctxt->src.bytes = 1;
3093
fastop(ctxt, em_or);
3094
ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3095
if (cf)
3096
ctxt->eflags |= X86_EFLAGS_CF;
3097
if (af)
3098
ctxt->eflags |= X86_EFLAGS_AF;
3099
return X86EMUL_CONTINUE;
3100
}
3101
3102
static int em_aam(struct x86_emulate_ctxt *ctxt)
3103
{
3104
u8 al, ah;
3105
3106
if (ctxt->src.val == 0)
3107
return emulate_de(ctxt);
3108
3109
al = ctxt->dst.val & 0xff;
3110
ah = al / ctxt->src.val;
3111
al %= ctxt->src.val;
3112
3113
ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3114
3115
/* Set PF, ZF, SF */
3116
ctxt->src.type = OP_IMM;
3117
ctxt->src.val = 0;
3118
ctxt->src.bytes = 1;
3119
fastop(ctxt, em_or);
3120
3121
return X86EMUL_CONTINUE;
3122
}
3123
3124
static int em_aad(struct x86_emulate_ctxt *ctxt)
3125
{
3126
u8 al = ctxt->dst.val & 0xff;
3127
u8 ah = (ctxt->dst.val >> 8) & 0xff;
3128
3129
al = (al + (ah * ctxt->src.val)) & 0xff;
3130
3131
ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3132
3133
/* Set PF, ZF, SF */
3134
ctxt->src.type = OP_IMM;
3135
ctxt->src.val = 0;
3136
ctxt->src.bytes = 1;
3137
fastop(ctxt, em_or);
3138
3139
return X86EMUL_CONTINUE;
3140
}
3141
3142
static int em_call(struct x86_emulate_ctxt *ctxt)
3143
{
3144
int rc;
3145
long rel = ctxt->src.val;
3146
3147
ctxt->src.val = (unsigned long)ctxt->_eip;
3148
rc = jmp_rel(ctxt, rel);
3149
if (rc != X86EMUL_CONTINUE)
3150
return rc;
3151
return em_push(ctxt);
3152
}
3153
3154
static int em_call_far(struct x86_emulate_ctxt *ctxt)
3155
{
3156
u16 sel, old_cs;
3157
ulong old_eip;
3158
int rc;
3159
struct desc_struct old_desc, new_desc;
3160
const struct x86_emulate_ops *ops = ctxt->ops;
3161
int cpl = ctxt->ops->cpl(ctxt);
3162
enum x86emul_mode prev_mode = ctxt->mode;
3163
3164
old_eip = ctxt->_eip;
3165
ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3166
3167
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3168
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3169
X86_TRANSFER_CALL_JMP, &new_desc);
3170
if (rc != X86EMUL_CONTINUE)
3171
return rc;
3172
3173
rc = assign_eip_far(ctxt, ctxt->src.val);
3174
if (rc != X86EMUL_CONTINUE)
3175
goto fail;
3176
3177
ctxt->src.val = old_cs;
3178
rc = em_push(ctxt);
3179
if (rc != X86EMUL_CONTINUE)
3180
goto fail;
3181
3182
ctxt->src.val = old_eip;
3183
rc = em_push(ctxt);
3184
/* If we failed, we tainted the memory, but the very least we should
3185
restore cs */
3186
if (rc != X86EMUL_CONTINUE) {
3187
pr_warn_once("faulting far call emulation tainted memory\n");
3188
goto fail;
3189
}
3190
return rc;
3191
fail:
3192
ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3193
ctxt->mode = prev_mode;
3194
return rc;
3195
3196
}
3197
3198
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3199
{
3200
int rc;
3201
unsigned long eip = 0;
3202
3203
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3204
if (rc != X86EMUL_CONTINUE)
3205
return rc;
3206
rc = assign_eip_near(ctxt, eip);
3207
if (rc != X86EMUL_CONTINUE)
3208
return rc;
3209
rsp_increment(ctxt, ctxt->src.val);
3210
return X86EMUL_CONTINUE;
3211
}
3212
3213
static int em_xchg(struct x86_emulate_ctxt *ctxt)
3214
{
3215
/* Write back the register source. */
3216
ctxt->src.val = ctxt->dst.val;
3217
write_register_operand(&ctxt->src);
3218
3219
/* Write back the memory destination with implicit LOCK prefix. */
3220
ctxt->dst.val = ctxt->src.orig_val;
3221
ctxt->lock_prefix = 1;
3222
return X86EMUL_CONTINUE;
3223
}
3224
3225
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3226
{
3227
ctxt->dst.val = ctxt->src2.val;
3228
return fastop(ctxt, em_imul);
3229
}
3230
3231
static int em_cwd(struct x86_emulate_ctxt *ctxt)
3232
{
3233
ctxt->dst.type = OP_REG;
3234
ctxt->dst.bytes = ctxt->src.bytes;
3235
ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3236
ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3237
3238
return X86EMUL_CONTINUE;
3239
}
3240
3241
static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3242
{
3243
u64 tsc_aux = 0;
3244
3245
if (!ctxt->ops->guest_has_rdpid(ctxt))
3246
return emulate_ud(ctxt);
3247
3248
ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3249
ctxt->dst.val = tsc_aux;
3250
return X86EMUL_CONTINUE;
3251
}
3252
3253
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3254
{
3255
u64 tsc = 0;
3256
3257
ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3258
*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3259
*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3260
return X86EMUL_CONTINUE;
3261
}
3262
3263
static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3264
{
3265
u64 pmc;
3266
3267
if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3268
return emulate_gp(ctxt, 0);
3269
*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3270
*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3271
return X86EMUL_CONTINUE;
3272
}
3273
3274
static int em_mov(struct x86_emulate_ctxt *ctxt)
3275
{
3276
memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3277
return X86EMUL_CONTINUE;
3278
}
3279
3280
static int em_movbe(struct x86_emulate_ctxt *ctxt)
3281
{
3282
u16 tmp;
3283
3284
if (!ctxt->ops->guest_has_movbe(ctxt))
3285
return emulate_ud(ctxt);
3286
3287
switch (ctxt->op_bytes) {
3288
case 2:
3289
/*
3290
* From MOVBE definition: "...When the operand size is 16 bits,
3291
* the upper word of the destination register remains unchanged
3292
* ..."
3293
*
3294
* Both casting ->valptr and ->val to u16 breaks strict aliasing
3295
* rules so we have to do the operation almost per hand.
3296
*/
3297
tmp = (u16)ctxt->src.val;
3298
ctxt->dst.val &= ~0xffffUL;
3299
ctxt->dst.val |= (unsigned long)swab16(tmp);
3300
break;
3301
case 4:
3302
ctxt->dst.val = swab32((u32)ctxt->src.val);
3303
break;
3304
case 8:
3305
ctxt->dst.val = swab64(ctxt->src.val);
3306
break;
3307
default:
3308
BUG();
3309
}
3310
return X86EMUL_CONTINUE;
3311
}
3312
3313
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3314
{
3315
int cr_num = ctxt->modrm_reg;
3316
int r;
3317
3318
if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3319
return emulate_gp(ctxt, 0);
3320
3321
/* Disable writeback. */
3322
ctxt->dst.type = OP_NONE;
3323
3324
if (cr_num == 0) {
3325
/*
3326
* CR0 write might have updated CR0.PE and/or CR0.PG
3327
* which can affect the cpu's execution mode.
3328
*/
3329
r = emulator_recalc_and_set_mode(ctxt);
3330
if (r != X86EMUL_CONTINUE)
3331
return r;
3332
}
3333
3334
return X86EMUL_CONTINUE;
3335
}
3336
3337
static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3338
{
3339
unsigned long val;
3340
3341
if (ctxt->mode == X86EMUL_MODE_PROT64)
3342
val = ctxt->src.val & ~0ULL;
3343
else
3344
val = ctxt->src.val & ~0U;
3345
3346
/* #UD condition is already handled. */
3347
if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3348
return emulate_gp(ctxt, 0);
3349
3350
/* Disable writeback. */
3351
ctxt->dst.type = OP_NONE;
3352
return X86EMUL_CONTINUE;
3353
}
3354
3355
static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3356
{
3357
u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3358
u64 msr_data;
3359
int r;
3360
3361
msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3362
| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3363
r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3364
3365
if (r == X86EMUL_PROPAGATE_FAULT)
3366
return emulate_gp(ctxt, 0);
3367
3368
return r;
3369
}
3370
3371
static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3372
{
3373
u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3374
u64 msr_data;
3375
int r;
3376
3377
r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3378
3379
if (r == X86EMUL_PROPAGATE_FAULT)
3380
return emulate_gp(ctxt, 0);
3381
3382
if (r == X86EMUL_CONTINUE) {
3383
*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3384
*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3385
}
3386
return r;
3387
}
3388
3389
static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3390
{
3391
if (segment > VCPU_SREG_GS &&
3392
(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3393
ctxt->ops->cpl(ctxt) > 0)
3394
return emulate_gp(ctxt, 0);
3395
3396
ctxt->dst.val = get_segment_selector(ctxt, segment);
3397
if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3398
ctxt->dst.bytes = 2;
3399
return X86EMUL_CONTINUE;
3400
}
3401
3402
static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3403
{
3404
if (ctxt->modrm_reg > VCPU_SREG_GS)
3405
return emulate_ud(ctxt);
3406
3407
return em_store_sreg(ctxt, ctxt->modrm_reg);
3408
}
3409
3410
static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3411
{
3412
u16 sel = ctxt->src.val;
3413
3414
if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3415
return emulate_ud(ctxt);
3416
3417
if (ctxt->modrm_reg == VCPU_SREG_SS)
3418
ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3419
3420
/* Disable writeback. */
3421
ctxt->dst.type = OP_NONE;
3422
return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3423
}
3424
3425
static int em_sldt(struct x86_emulate_ctxt *ctxt)
3426
{
3427
return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3428
}
3429
3430
static int em_lldt(struct x86_emulate_ctxt *ctxt)
3431
{
3432
u16 sel = ctxt->src.val;
3433
3434
/* Disable writeback. */
3435
ctxt->dst.type = OP_NONE;
3436
return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3437
}
3438
3439
static int em_str(struct x86_emulate_ctxt *ctxt)
3440
{
3441
return em_store_sreg(ctxt, VCPU_SREG_TR);
3442
}
3443
3444
static int em_ltr(struct x86_emulate_ctxt *ctxt)
3445
{
3446
u16 sel = ctxt->src.val;
3447
3448
/* Disable writeback. */
3449
ctxt->dst.type = OP_NONE;
3450
return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3451
}
3452
3453
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3454
{
3455
int rc;
3456
ulong linear;
3457
unsigned int max_size;
3458
3459
rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
3460
&linear, X86EMUL_F_INVLPG);
3461
if (rc == X86EMUL_CONTINUE)
3462
ctxt->ops->invlpg(ctxt, linear);
3463
/* Disable writeback. */
3464
ctxt->dst.type = OP_NONE;
3465
return X86EMUL_CONTINUE;
3466
}
3467
3468
static int em_clts(struct x86_emulate_ctxt *ctxt)
3469
{
3470
ulong cr0;
3471
3472
cr0 = ctxt->ops->get_cr(ctxt, 0);
3473
cr0 &= ~X86_CR0_TS;
3474
ctxt->ops->set_cr(ctxt, 0, cr0);
3475
return X86EMUL_CONTINUE;
3476
}
3477
3478
static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3479
{
3480
int rc = ctxt->ops->fix_hypercall(ctxt);
3481
3482
if (rc != X86EMUL_CONTINUE)
3483
return rc;
3484
3485
/* Let the processor re-execute the fixed hypercall */
3486
ctxt->_eip = ctxt->eip;
3487
/* Disable writeback. */
3488
ctxt->dst.type = OP_NONE;
3489
return X86EMUL_CONTINUE;
3490
}
3491
3492
static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3493
void (*get)(struct x86_emulate_ctxt *ctxt,
3494
struct desc_ptr *ptr))
3495
{
3496
struct desc_ptr desc_ptr;
3497
3498
if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3499
ctxt->ops->cpl(ctxt) > 0)
3500
return emulate_gp(ctxt, 0);
3501
3502
if (ctxt->mode == X86EMUL_MODE_PROT64)
3503
ctxt->op_bytes = 8;
3504
get(ctxt, &desc_ptr);
3505
if (ctxt->op_bytes == 2) {
3506
ctxt->op_bytes = 4;
3507
desc_ptr.address &= 0x00ffffff;
3508
}
3509
/* Disable writeback. */
3510
ctxt->dst.type = OP_NONE;
3511
return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3512
&desc_ptr, 2 + ctxt->op_bytes);
3513
}
3514
3515
static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3516
{
3517
return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3518
}
3519
3520
static int em_sidt(struct x86_emulate_ctxt *ctxt)
3521
{
3522
return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3523
}
3524
3525
static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3526
{
3527
struct desc_ptr desc_ptr;
3528
int rc;
3529
3530
if (ctxt->mode == X86EMUL_MODE_PROT64)
3531
ctxt->op_bytes = 8;
3532
rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3533
&desc_ptr.size, &desc_ptr.address,
3534
ctxt->op_bytes);
3535
if (rc != X86EMUL_CONTINUE)
3536
return rc;
3537
if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3538
emul_is_noncanonical_address(desc_ptr.address, ctxt,
3539
X86EMUL_F_DT_LOAD))
3540
return emulate_gp(ctxt, 0);
3541
if (lgdt)
3542
ctxt->ops->set_gdt(ctxt, &desc_ptr);
3543
else
3544
ctxt->ops->set_idt(ctxt, &desc_ptr);
3545
/* Disable writeback. */
3546
ctxt->dst.type = OP_NONE;
3547
return X86EMUL_CONTINUE;
3548
}
3549
3550
static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3551
{
3552
return em_lgdt_lidt(ctxt, true);
3553
}
3554
3555
static int em_lidt(struct x86_emulate_ctxt *ctxt)
3556
{
3557
return em_lgdt_lidt(ctxt, false);
3558
}
3559
3560
static int em_smsw(struct x86_emulate_ctxt *ctxt)
3561
{
3562
if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3563
ctxt->ops->cpl(ctxt) > 0)
3564
return emulate_gp(ctxt, 0);
3565
3566
if (ctxt->dst.type == OP_MEM)
3567
ctxt->dst.bytes = 2;
3568
ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3569
return X86EMUL_CONTINUE;
3570
}
3571
3572
static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3573
{
3574
ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3575
| (ctxt->src.val & 0x0f));
3576
ctxt->dst.type = OP_NONE;
3577
return X86EMUL_CONTINUE;
3578
}
3579
3580
static int em_loop(struct x86_emulate_ctxt *ctxt)
3581
{
3582
int rc = X86EMUL_CONTINUE;
3583
3584
register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3585
if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3586
(ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3587
rc = jmp_rel(ctxt, ctxt->src.val);
3588
3589
return rc;
3590
}
3591
3592
static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3593
{
3594
int rc = X86EMUL_CONTINUE;
3595
3596
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3597
rc = jmp_rel(ctxt, ctxt->src.val);
3598
3599
return rc;
3600
}
3601
3602
static int em_in(struct x86_emulate_ctxt *ctxt)
3603
{
3604
if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3605
&ctxt->dst.val))
3606
return X86EMUL_IO_NEEDED;
3607
3608
return X86EMUL_CONTINUE;
3609
}
3610
3611
static int em_out(struct x86_emulate_ctxt *ctxt)
3612
{
3613
ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3614
&ctxt->src.val, 1);
3615
/* Disable writeback. */
3616
ctxt->dst.type = OP_NONE;
3617
return X86EMUL_CONTINUE;
3618
}
3619
3620
static int em_cli(struct x86_emulate_ctxt *ctxt)
3621
{
3622
if (emulator_bad_iopl(ctxt))
3623
return emulate_gp(ctxt, 0);
3624
3625
ctxt->eflags &= ~X86_EFLAGS_IF;
3626
return X86EMUL_CONTINUE;
3627
}
3628
3629
static int em_sti(struct x86_emulate_ctxt *ctxt)
3630
{
3631
if (emulator_bad_iopl(ctxt))
3632
return emulate_gp(ctxt, 0);
3633
3634
ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3635
ctxt->eflags |= X86_EFLAGS_IF;
3636
return X86EMUL_CONTINUE;
3637
}
3638
3639
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3640
{
3641
u32 eax, ebx, ecx, edx;
3642
u64 msr = 0;
3643
3644
ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3645
if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3646
ctxt->ops->cpl(ctxt)) {
3647
return emulate_gp(ctxt, 0);
3648
}
3649
3650
eax = reg_read(ctxt, VCPU_REGS_RAX);
3651
ecx = reg_read(ctxt, VCPU_REGS_RCX);
3652
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3653
*reg_write(ctxt, VCPU_REGS_RAX) = eax;
3654
*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3655
*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3656
*reg_write(ctxt, VCPU_REGS_RDX) = edx;
3657
return X86EMUL_CONTINUE;
3658
}
3659
3660
static int em_sahf(struct x86_emulate_ctxt *ctxt)
3661
{
3662
u32 flags;
3663
3664
flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3665
X86_EFLAGS_SF;
3666
flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3667
3668
ctxt->eflags &= ~0xffUL;
3669
ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3670
return X86EMUL_CONTINUE;
3671
}
3672
3673
static int em_lahf(struct x86_emulate_ctxt *ctxt)
3674
{
3675
*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3676
*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3677
return X86EMUL_CONTINUE;
3678
}
3679
3680
static int em_bswap(struct x86_emulate_ctxt *ctxt)
3681
{
3682
switch (ctxt->op_bytes) {
3683
#ifdef CONFIG_X86_64
3684
case 8:
3685
asm("bswap %0" : "+r"(ctxt->dst.val));
3686
break;
3687
#endif
3688
default:
3689
asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3690
break;
3691
}
3692
return X86EMUL_CONTINUE;
3693
}
3694
3695
static int em_clflush(struct x86_emulate_ctxt *ctxt)
3696
{
3697
/* emulating clflush regardless of cpuid */
3698
return X86EMUL_CONTINUE;
3699
}
3700
3701
static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3702
{
3703
/* emulating clflushopt regardless of cpuid */
3704
return X86EMUL_CONTINUE;
3705
}
3706
3707
static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3708
{
3709
ctxt->dst.val = (s32) ctxt->src.val;
3710
return X86EMUL_CONTINUE;
3711
}
3712
3713
static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3714
{
3715
if (!ctxt->ops->guest_has_fxsr(ctxt))
3716
return emulate_ud(ctxt);
3717
3718
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3719
return emulate_nm(ctxt);
3720
3721
/*
3722
* Don't emulate a case that should never be hit, instead of working
3723
* around a lack of fxsave64/fxrstor64 on old compilers.
3724
*/
3725
if (ctxt->mode >= X86EMUL_MODE_PROT64)
3726
return X86EMUL_UNHANDLEABLE;
3727
3728
return X86EMUL_CONTINUE;
3729
}
3730
3731
/*
3732
* Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3733
* and restore MXCSR.
3734
*/
3735
static size_t __fxstate_size(int nregs)
3736
{
3737
return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3738
}
3739
3740
static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3741
{
3742
bool cr4_osfxsr;
3743
if (ctxt->mode == X86EMUL_MODE_PROT64)
3744
return __fxstate_size(16);
3745
3746
cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
3747
return __fxstate_size(cr4_osfxsr ? 8 : 0);
3748
}
3749
3750
/*
3751
* FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3752
* 1) 16 bit mode
3753
* 2) 32 bit mode
3754
* - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3755
* preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3756
* save and restore
3757
* 3) 64-bit mode with REX.W prefix
3758
* - like (2), but XMM 8-15 are being saved and restored
3759
* 4) 64-bit mode without REX.W prefix
3760
* - like (3), but FIP and FDP are 64 bit
3761
*
3762
* Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3763
* desired result. (4) is not emulated.
3764
*
3765
* Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3766
* and FPU DS) should match.
3767
*/
3768
static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3769
{
3770
struct fxregs_state fx_state;
3771
int rc;
3772
3773
rc = check_fxsr(ctxt);
3774
if (rc != X86EMUL_CONTINUE)
3775
return rc;
3776
3777
kvm_fpu_get();
3778
3779
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3780
3781
kvm_fpu_put();
3782
3783
if (rc != X86EMUL_CONTINUE)
3784
return rc;
3785
3786
return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
3787
fxstate_size(ctxt));
3788
}
3789
3790
/*
3791
* FXRSTOR might restore XMM registers not provided by the guest. Fill
3792
* in the host registers (via FXSAVE) instead, so they won't be modified.
3793
* (preemption has to stay disabled until FXRSTOR).
3794
*
3795
* Use noinline to keep the stack for other functions called by callers small.
3796
*/
3797
static noinline int fxregs_fixup(struct fxregs_state *fx_state,
3798
const size_t used_size)
3799
{
3800
struct fxregs_state fx_tmp;
3801
int rc;
3802
3803
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
3804
memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
3805
__fxstate_size(16) - used_size);
3806
3807
return rc;
3808
}
3809
3810
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3811
{
3812
struct fxregs_state fx_state;
3813
int rc;
3814
size_t size;
3815
3816
rc = check_fxsr(ctxt);
3817
if (rc != X86EMUL_CONTINUE)
3818
return rc;
3819
3820
size = fxstate_size(ctxt);
3821
rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3822
if (rc != X86EMUL_CONTINUE)
3823
return rc;
3824
3825
kvm_fpu_get();
3826
3827
if (size < __fxstate_size(16)) {
3828
rc = fxregs_fixup(&fx_state, size);
3829
if (rc != X86EMUL_CONTINUE)
3830
goto out;
3831
}
3832
3833
if (fx_state.mxcsr >> 16) {
3834
rc = emulate_gp(ctxt, 0);
3835
goto out;
3836
}
3837
3838
if (rc == X86EMUL_CONTINUE)
3839
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3840
3841
out:
3842
kvm_fpu_put();
3843
3844
return rc;
3845
}
3846
3847
static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
3848
{
3849
u32 eax, ecx, edx;
3850
3851
if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
3852
return emulate_ud(ctxt);
3853
3854
eax = reg_read(ctxt, VCPU_REGS_RAX);
3855
edx = reg_read(ctxt, VCPU_REGS_RDX);
3856
ecx = reg_read(ctxt, VCPU_REGS_RCX);
3857
3858
if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
3859
return emulate_gp(ctxt, 0);
3860
3861
return X86EMUL_CONTINUE;
3862
}
3863
3864
static bool valid_cr(int nr)
3865
{
3866
switch (nr) {
3867
case 0:
3868
case 2 ... 4:
3869
case 8:
3870
return true;
3871
default:
3872
return false;
3873
}
3874
}
3875
3876
static int check_cr_access(struct x86_emulate_ctxt *ctxt)
3877
{
3878
if (!valid_cr(ctxt->modrm_reg))
3879
return emulate_ud(ctxt);
3880
3881
return X86EMUL_CONTINUE;
3882
}
3883
3884
static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3885
{
3886
int dr = ctxt->modrm_reg;
3887
u64 cr4;
3888
3889
if (dr > 7)
3890
return emulate_ud(ctxt);
3891
3892
cr4 = ctxt->ops->get_cr(ctxt, 4);
3893
if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3894
return emulate_ud(ctxt);
3895
3896
if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) {
3897
ulong dr6;
3898
3899
dr6 = ctxt->ops->get_dr(ctxt, 6);
3900
dr6 &= ~DR_TRAP_BITS;
3901
dr6 |= DR6_BD | DR6_ACTIVE_LOW;
3902
ctxt->ops->set_dr(ctxt, 6, dr6);
3903
return emulate_db(ctxt);
3904
}
3905
3906
return X86EMUL_CONTINUE;
3907
}
3908
3909
static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3910
{
3911
u64 new_val = ctxt->src.val64;
3912
int dr = ctxt->modrm_reg;
3913
3914
if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3915
return emulate_gp(ctxt, 0);
3916
3917
return check_dr_read(ctxt);
3918
}
3919
3920
static int check_svme(struct x86_emulate_ctxt *ctxt)
3921
{
3922
u64 efer = 0;
3923
3924
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3925
3926
if (!(efer & EFER_SVME))
3927
return emulate_ud(ctxt);
3928
3929
return X86EMUL_CONTINUE;
3930
}
3931
3932
static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3933
{
3934
u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3935
3936
/* Valid physical address? */
3937
if (rax & 0xffff000000000000ULL)
3938
return emulate_gp(ctxt, 0);
3939
3940
return check_svme(ctxt);
3941
}
3942
3943
static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3944
{
3945
u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3946
3947
if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3948
return emulate_gp(ctxt, 0);
3949
3950
return X86EMUL_CONTINUE;
3951
}
3952
3953
static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3954
{
3955
u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3956
u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3957
3958
/*
3959
* VMware allows access to these Pseduo-PMCs even when read via RDPMC
3960
* in Ring3 when CR4.PCE=0.
3961
*/
3962
if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
3963
return X86EMUL_CONTINUE;
3964
3965
/*
3966
* If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
3967
* check however is unnecessary because CPL is always 0 outside
3968
* protected mode.
3969
*/
3970
if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3971
ctxt->ops->check_rdpmc_early(ctxt, rcx))
3972
return emulate_gp(ctxt, 0);
3973
3974
return X86EMUL_CONTINUE;
3975
}
3976
3977
static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3978
{
3979
ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3980
if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes))
3981
return emulate_gp(ctxt, 0);
3982
3983
return X86EMUL_CONTINUE;
3984
}
3985
3986
static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3987
{
3988
ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3989
if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes))
3990
return emulate_gp(ctxt, 0);
3991
3992
return X86EMUL_CONTINUE;
3993
}
3994
3995
#define D(_y) { .flags = (_y) }
3996
#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3997
#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3998
.intercept = x86_intercept_##_i, .check_perm = (_p) }
3999
#define N D(NotImpl)
4000
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4001
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4002
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4003
#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4004
#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4005
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4006
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4007
#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4008
#define II(_f, _e, _i) \
4009
{ .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4010
#define IIP(_f, _e, _i, _p) \
4011
{ .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4012
.intercept = x86_intercept_##_i, .check_perm = (_p) }
4013
#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4014
4015
#define D2bv(_f) D((_f) | ByteOp), D(_f)
4016
#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4017
#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4018
#define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4019
#define I2bvIP(_f, _e, _i, _p) \
4020
IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4021
4022
#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4023
F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4024
F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4025
4026
static const struct opcode group7_rm0[] = {
4027
N,
4028
I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4029
N, N, N, N, N, N,
4030
};
4031
4032
static const struct opcode group7_rm1[] = {
4033
DI(SrcNone | Priv, monitor),
4034
DI(SrcNone | Priv, mwait),
4035
N, N, N, N, N, N,
4036
};
4037
4038
static const struct opcode group7_rm2[] = {
4039
N,
4040
II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4041
N, N, N, N, N, N,
4042
};
4043
4044
static const struct opcode group7_rm3[] = {
4045
DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4046
II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4047
DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4048
DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4049
DIP(SrcNone | Prot | Priv, stgi, check_svme),
4050
DIP(SrcNone | Prot | Priv, clgi, check_svme),
4051
DIP(SrcNone | Prot | Priv, skinit, check_svme),
4052
DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4053
};
4054
4055
static const struct opcode group7_rm7[] = {
4056
N,
4057
DIP(SrcNone, rdtscp, check_rdtsc),
4058
N, N, N, N, N, N,
4059
};
4060
4061
static const struct opcode group1[] = {
4062
F(Lock, em_add),
4063
F(Lock | PageTable, em_or),
4064
F(Lock, em_adc),
4065
F(Lock, em_sbb),
4066
F(Lock | PageTable, em_and),
4067
F(Lock, em_sub),
4068
F(Lock, em_xor),
4069
F(NoWrite, em_cmp),
4070
};
4071
4072
static const struct opcode group1A[] = {
4073
I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4074
};
4075
4076
static const struct opcode group2[] = {
4077
F(DstMem | ModRM, em_rol),
4078
F(DstMem | ModRM, em_ror),
4079
F(DstMem | ModRM, em_rcl),
4080
F(DstMem | ModRM, em_rcr),
4081
F(DstMem | ModRM, em_shl),
4082
F(DstMem | ModRM, em_shr),
4083
F(DstMem | ModRM, em_shl),
4084
F(DstMem | ModRM, em_sar),
4085
};
4086
4087
static const struct opcode group3[] = {
4088
F(DstMem | SrcImm | NoWrite, em_test),
4089
F(DstMem | SrcImm | NoWrite, em_test),
4090
F(DstMem | SrcNone | Lock, em_not),
4091
F(DstMem | SrcNone | Lock, em_neg),
4092
F(DstXacc | Src2Mem, em_mul_ex),
4093
F(DstXacc | Src2Mem, em_imul_ex),
4094
F(DstXacc | Src2Mem, em_div_ex),
4095
F(DstXacc | Src2Mem, em_idiv_ex),
4096
};
4097
4098
static const struct opcode group4[] = {
4099
F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4100
F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4101
N, N, N, N, N, N,
4102
};
4103
4104
static const struct opcode group5[] = {
4105
F(DstMem | SrcNone | Lock, em_inc),
4106
F(DstMem | SrcNone | Lock, em_dec),
4107
I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs),
4108
I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far),
4109
I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
4110
I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4111
I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4112
};
4113
4114
static const struct opcode group6[] = {
4115
II(Prot | DstMem, em_sldt, sldt),
4116
II(Prot | DstMem, em_str, str),
4117
II(Prot | Priv | SrcMem16, em_lldt, lldt),
4118
II(Prot | Priv | SrcMem16, em_ltr, ltr),
4119
N, N, N, N,
4120
};
4121
4122
static const struct group_dual group7 = { {
4123
II(Mov | DstMem, em_sgdt, sgdt),
4124
II(Mov | DstMem, em_sidt, sidt),
4125
II(SrcMem | Priv, em_lgdt, lgdt),
4126
II(SrcMem | Priv, em_lidt, lidt),
4127
II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4128
II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4129
II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4130
}, {
4131
EXT(0, group7_rm0),
4132
EXT(0, group7_rm1),
4133
EXT(0, group7_rm2),
4134
EXT(0, group7_rm3),
4135
II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4136
II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4137
EXT(0, group7_rm7),
4138
} };
4139
4140
static const struct opcode group8[] = {
4141
N, N, N, N,
4142
F(DstMem | SrcImmByte | NoWrite, em_bt),
4143
F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4144
F(DstMem | SrcImmByte | Lock, em_btr),
4145
F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4146
};
4147
4148
/*
4149
* The "memory" destination is actually always a register, since we come
4150
* from the register case of group9.
4151
*/
4152
static const struct gprefix pfx_0f_c7_7 = {
4153
N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4154
};
4155
4156
4157
static const struct group_dual group9 = { {
4158
N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4159
}, {
4160
N, N, N, N, N, N, N,
4161
GP(0, &pfx_0f_c7_7),
4162
} };
4163
4164
static const struct opcode group11[] = {
4165
I(DstMem | SrcImm | Mov | PageTable, em_mov),
4166
X7(D(Undefined)),
4167
};
4168
4169
static const struct gprefix pfx_0f_ae_7 = {
4170
I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4171
};
4172
4173
static const struct group_dual group15 = { {
4174
I(ModRM | Aligned16, em_fxsave),
4175
I(ModRM | Aligned16, em_fxrstor),
4176
N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4177
}, {
4178
N, N, N, N, N, N, N, N,
4179
} };
4180
4181
static const struct gprefix pfx_0f_6f_0f_7f = {
4182
I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4183
};
4184
4185
static const struct instr_dual instr_dual_0f_2b = {
4186
I(0, em_mov), N
4187
};
4188
4189
static const struct gprefix pfx_0f_2b = {
4190
ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4191
};
4192
4193
static const struct gprefix pfx_0f_10_0f_11 = {
4194
I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4195
};
4196
4197
static const struct gprefix pfx_0f_28_0f_29 = {
4198
I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4199
};
4200
4201
static const struct gprefix pfx_0f_e7 = {
4202
N, I(Sse, em_mov), N, N,
4203
};
4204
4205
static const struct escape escape_d9 = { {
4206
N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4207
}, {
4208
/* 0xC0 - 0xC7 */
4209
N, N, N, N, N, N, N, N,
4210
/* 0xC8 - 0xCF */
4211
N, N, N, N, N, N, N, N,
4212
/* 0xD0 - 0xC7 */
4213
N, N, N, N, N, N, N, N,
4214
/* 0xD8 - 0xDF */
4215
N, N, N, N, N, N, N, N,
4216
/* 0xE0 - 0xE7 */
4217
N, N, N, N, N, N, N, N,
4218
/* 0xE8 - 0xEF */
4219
N, N, N, N, N, N, N, N,
4220
/* 0xF0 - 0xF7 */
4221
N, N, N, N, N, N, N, N,
4222
/* 0xF8 - 0xFF */
4223
N, N, N, N, N, N, N, N,
4224
} };
4225
4226
static const struct escape escape_db = { {
4227
N, N, N, N, N, N, N, N,
4228
}, {
4229
/* 0xC0 - 0xC7 */
4230
N, N, N, N, N, N, N, N,
4231
/* 0xC8 - 0xCF */
4232
N, N, N, N, N, N, N, N,
4233
/* 0xD0 - 0xC7 */
4234
N, N, N, N, N, N, N, N,
4235
/* 0xD8 - 0xDF */
4236
N, N, N, N, N, N, N, N,
4237
/* 0xE0 - 0xE7 */
4238
N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4239
/* 0xE8 - 0xEF */
4240
N, N, N, N, N, N, N, N,
4241
/* 0xF0 - 0xF7 */
4242
N, N, N, N, N, N, N, N,
4243
/* 0xF8 - 0xFF */
4244
N, N, N, N, N, N, N, N,
4245
} };
4246
4247
static const struct escape escape_dd = { {
4248
N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4249
}, {
4250
/* 0xC0 - 0xC7 */
4251
N, N, N, N, N, N, N, N,
4252
/* 0xC8 - 0xCF */
4253
N, N, N, N, N, N, N, N,
4254
/* 0xD0 - 0xC7 */
4255
N, N, N, N, N, N, N, N,
4256
/* 0xD8 - 0xDF */
4257
N, N, N, N, N, N, N, N,
4258
/* 0xE0 - 0xE7 */
4259
N, N, N, N, N, N, N, N,
4260
/* 0xE8 - 0xEF */
4261
N, N, N, N, N, N, N, N,
4262
/* 0xF0 - 0xF7 */
4263
N, N, N, N, N, N, N, N,
4264
/* 0xF8 - 0xFF */
4265
N, N, N, N, N, N, N, N,
4266
} };
4267
4268
static const struct instr_dual instr_dual_0f_c3 = {
4269
I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4270
};
4271
4272
static const struct mode_dual mode_dual_63 = {
4273
N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4274
};
4275
4276
static const struct instr_dual instr_dual_8d = {
4277
D(DstReg | SrcMem | ModRM | NoAccess), N
4278
};
4279
4280
static const struct opcode opcode_table[256] = {
4281
/* 0x00 - 0x07 */
4282
F6ALU(Lock, em_add),
4283
I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4284
I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4285
/* 0x08 - 0x0F */
4286
F6ALU(Lock | PageTable, em_or),
4287
I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4288
N,
4289
/* 0x10 - 0x17 */
4290
F6ALU(Lock, em_adc),
4291
I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4292
I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4293
/* 0x18 - 0x1F */
4294
F6ALU(Lock, em_sbb),
4295
I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4296
I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4297
/* 0x20 - 0x27 */
4298
F6ALU(Lock | PageTable, em_and), N, N,
4299
/* 0x28 - 0x2F */
4300
F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4301
/* 0x30 - 0x37 */
4302
F6ALU(Lock, em_xor), N, N,
4303
/* 0x38 - 0x3F */
4304
F6ALU(NoWrite, em_cmp), N, N,
4305
/* 0x40 - 0x4F */
4306
X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4307
/* 0x50 - 0x57 */
4308
X8(I(SrcReg | Stack, em_push)),
4309
/* 0x58 - 0x5F */
4310
X8(I(DstReg | Stack, em_pop)),
4311
/* 0x60 - 0x67 */
4312
I(ImplicitOps | Stack | No64, em_pusha),
4313
I(ImplicitOps | Stack | No64, em_popa),
4314
N, MD(ModRM, &mode_dual_63),
4315
N, N, N, N,
4316
/* 0x68 - 0x6F */
4317
I(SrcImm | Mov | Stack, em_push),
4318
I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4319
I(SrcImmByte | Mov | Stack, em_push),
4320
I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4321
I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4322
I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4323
/* 0x70 - 0x7F */
4324
X16(D(SrcImmByte | NearBranch | IsBranch)),
4325
/* 0x80 - 0x87 */
4326
G(ByteOp | DstMem | SrcImm, group1),
4327
G(DstMem | SrcImm, group1),
4328
G(ByteOp | DstMem | SrcImm | No64, group1),
4329
G(DstMem | SrcImmByte, group1),
4330
F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4331
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4332
/* 0x88 - 0x8F */
4333
I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4334
I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4335
I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4336
ID(0, &instr_dual_8d),
4337
I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4338
G(0, group1A),
4339
/* 0x90 - 0x97 */
4340
DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4341
/* 0x98 - 0x9F */
4342
D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4343
I(SrcImmFAddr | No64 | IsBranch | ShadowStack, em_call_far), N,
4344
II(ImplicitOps | Stack, em_pushf, pushf),
4345
II(ImplicitOps | Stack, em_popf, popf),
4346
I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4347
/* 0xA0 - 0xA7 */
4348
I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4349
I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4350
I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4351
F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4352
/* 0xA8 - 0xAF */
4353
F2bv(DstAcc | SrcImm | NoWrite, em_test),
4354
I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4355
I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4356
F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4357
/* 0xB0 - 0xB7 */
4358
X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4359
/* 0xB8 - 0xBF */
4360
X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4361
/* 0xC0 - 0xC7 */
4362
G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4363
I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm),
4364
I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret),
4365
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4366
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4367
G(ByteOp, group11), G(0, group11),
4368
/* 0xC8 - 0xCF */
4369
I(Stack | SrcImmU16 | Src2ImmByte, em_enter),
4370
I(Stack, em_leave),
4371
I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
4372
I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
4373
D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
4374
D(ImplicitOps | No64 | IsBranch),
4375
II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
4376
/* 0xD0 - 0xD7 */
4377
G(Src2One | ByteOp, group2), G(Src2One, group2),
4378
G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4379
I(DstAcc | SrcImmUByte | No64, em_aam),
4380
I(DstAcc | SrcImmUByte | No64, em_aad),
4381
F(DstAcc | ByteOp | No64, em_salc),
4382
I(DstAcc | SrcXLat | ByteOp, em_mov),
4383
/* 0xD8 - 0xDF */
4384
N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4385
/* 0xE0 - 0xE7 */
4386
X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4387
I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4388
I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4389
I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4390
/* 0xE8 - 0xEF */
4391
I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
4392
D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4393
I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4394
D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4395
I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4396
I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4397
/* 0xF0 - 0xF7 */
4398
N, DI(ImplicitOps, icebp), N, N,
4399
DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4400
G(ByteOp, group3), G(0, group3),
4401
/* 0xF8 - 0xFF */
4402
D(ImplicitOps), D(ImplicitOps),
4403
I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4404
D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4405
};
4406
4407
static const struct opcode twobyte_table[256] = {
4408
/* 0x00 - 0x0F */
4409
G(0, group6), GD(0, &group7), N, N,
4410
N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_syscall),
4411
II(ImplicitOps | Priv, em_clts, clts), N,
4412
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4413
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4414
/* 0x10 - 0x1F */
4415
GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4416
GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4417
N, N, N, N, N, N,
4418
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4419
D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4420
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4421
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4422
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4423
D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4424
/* 0x20 - 0x2F */
4425
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4426
DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4427
IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4428
check_cr_access),
4429
IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4430
check_dr_write),
4431
N, N, N, N,
4432
GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4433
GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4434
N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4435
N, N, N, N,
4436
/* 0x30 - 0x3F */
4437
II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4438
IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4439
II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4440
IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4441
I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter),
4442
I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
4443
N, N,
4444
N, N, N, N, N, N, N, N,
4445
/* 0x40 - 0x4F */
4446
X16(D(DstReg | SrcMem | ModRM)),
4447
/* 0x50 - 0x5F */
4448
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4449
/* 0x60 - 0x6F */
4450
N, N, N, N,
4451
N, N, N, N,
4452
N, N, N, N,
4453
N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4454
/* 0x70 - 0x7F */
4455
N, N, N, N,
4456
N, N, N, N,
4457
N, N, N, N,
4458
N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4459
/* 0x80 - 0x8F */
4460
X16(D(SrcImm | NearBranch | IsBranch)),
4461
/* 0x90 - 0x9F */
4462
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4463
/* 0xA0 - 0xA7 */
4464
I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4465
II(ImplicitOps, em_cpuid, cpuid),
4466
F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4467
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4468
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4469
/* 0xA8 - 0xAF */
4470
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4471
II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4472
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4473
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4474
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4475
GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4476
/* 0xB0 - 0xB7 */
4477
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4478
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4479
F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4480
I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4481
I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4482
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4483
/* 0xB8 - 0xBF */
4484
N, N,
4485
G(BitOp, group8),
4486
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4487
I(DstReg | SrcMem | ModRM, em_bsf_c),
4488
I(DstReg | SrcMem | ModRM, em_bsr_c),
4489
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4490
/* 0xC0 - 0xC7 */
4491
F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4492
N, ID(0, &instr_dual_0f_c3),
4493
N, N, N, GD(0, &group9),
4494
/* 0xC8 - 0xCF */
4495
X8(I(DstReg, em_bswap)),
4496
/* 0xD0 - 0xDF */
4497
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4498
/* 0xE0 - 0xEF */
4499
N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4500
N, N, N, N, N, N, N, N,
4501
/* 0xF0 - 0xFF */
4502
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4503
};
4504
4505
static const struct instr_dual instr_dual_0f_38_f0 = {
4506
I(DstReg | SrcMem | Mov, em_movbe), N
4507
};
4508
4509
static const struct instr_dual instr_dual_0f_38_f1 = {
4510
I(DstMem | SrcReg | Mov, em_movbe), N
4511
};
4512
4513
static const struct gprefix three_byte_0f_38_f0 = {
4514
ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N
4515
};
4516
4517
static const struct gprefix three_byte_0f_38_f1 = {
4518
ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N
4519
};
4520
4521
/*
4522
* Insns below are selected by the prefix which indexed by the third opcode
4523
* byte.
4524
*/
4525
static const struct opcode opcode_map_0f_38[256] = {
4526
/* 0x00 - 0x7f */
4527
X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4528
/* 0x80 - 0xef */
4529
X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4530
/* 0xf0 - 0xf1 */
4531
GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4532
GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4533
/* 0xf2 - 0xff */
4534
N, N, X4(N), X8(N)
4535
};
4536
4537
#undef D
4538
#undef N
4539
#undef G
4540
#undef GD
4541
#undef I
4542
#undef GP
4543
#undef EXT
4544
#undef MD
4545
#undef ID
4546
4547
#undef D2bv
4548
#undef D2bvIP
4549
#undef I2bv
4550
#undef I2bvIP
4551
#undef I6ALU
4552
4553
static bool is_shstk_instruction(struct x86_emulate_ctxt *ctxt)
4554
{
4555
return ctxt->d & ShadowStack;
4556
}
4557
4558
static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
4559
{
4560
u64 flags = ctxt->d;
4561
4562
if (!(flags & IsBranch))
4563
return false;
4564
4565
/*
4566
* All far JMPs and CALLs (including SYSCALL, SYSENTER, and INTn) are
4567
* indirect and thus affect IBT state. All far RETs (including SYSEXIT
4568
* and IRET) are protected via Shadow Stacks and thus don't affect IBT
4569
* state. IRET #GPs when returning to virtual-8086 and IBT or SHSTK is
4570
* enabled, but that should be handled by IRET emulation (in the very
4571
* unlikely scenario that KVM adds support for fully emulating IRET).
4572
*/
4573
if (!(flags & NearBranch))
4574
return ctxt->execute != em_iret &&
4575
ctxt->execute != em_ret_far &&
4576
ctxt->execute != em_ret_far_imm &&
4577
ctxt->execute != em_sysexit;
4578
4579
switch (flags & SrcMask) {
4580
case SrcReg:
4581
case SrcMem:
4582
case SrcMem16:
4583
case SrcMem32:
4584
return true;
4585
case SrcMemFAddr:
4586
case SrcImmFAddr:
4587
/* Far branches should be handled above. */
4588
WARN_ON_ONCE(1);
4589
return true;
4590
case SrcNone:
4591
case SrcImm:
4592
case SrcImmByte:
4593
/*
4594
* Note, ImmU16 is used only for the stack adjustment operand on ENTER
4595
* and RET instructions. ENTER isn't a branch and RET FAR is handled
4596
* by the NearBranch check above. RET itself isn't an indirect branch.
4597
*/
4598
case SrcImmU16:
4599
return false;
4600
default:
4601
WARN_ONCE(1, "Unexpected Src operand '%llx' on branch",
4602
flags & SrcMask);
4603
return false;
4604
}
4605
}
4606
4607
static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4608
{
4609
unsigned size;
4610
4611
size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4612
if (size == 8)
4613
size = 4;
4614
return size;
4615
}
4616
4617
static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4618
unsigned size, bool sign_extension)
4619
{
4620
int rc = X86EMUL_CONTINUE;
4621
4622
op->type = OP_IMM;
4623
op->bytes = size;
4624
op->addr.mem.ea = ctxt->_eip;
4625
/* NB. Immediates are sign-extended as necessary. */
4626
switch (op->bytes) {
4627
case 1:
4628
op->val = insn_fetch(s8, ctxt);
4629
break;
4630
case 2:
4631
op->val = insn_fetch(s16, ctxt);
4632
break;
4633
case 4:
4634
op->val = insn_fetch(s32, ctxt);
4635
break;
4636
case 8:
4637
op->val = insn_fetch(s64, ctxt);
4638
break;
4639
}
4640
if (!sign_extension) {
4641
switch (op->bytes) {
4642
case 1:
4643
op->val &= 0xff;
4644
break;
4645
case 2:
4646
op->val &= 0xffff;
4647
break;
4648
case 4:
4649
op->val &= 0xffffffff;
4650
break;
4651
}
4652
}
4653
done:
4654
return rc;
4655
}
4656
4657
static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4658
unsigned d)
4659
{
4660
int rc = X86EMUL_CONTINUE;
4661
4662
switch (d) {
4663
case OpReg:
4664
decode_register_operand(ctxt, op);
4665
break;
4666
case OpImmUByte:
4667
rc = decode_imm(ctxt, op, 1, false);
4668
break;
4669
case OpMem:
4670
ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4671
mem_common:
4672
*op = ctxt->memop;
4673
ctxt->memopp = op;
4674
if (ctxt->d & BitOp)
4675
fetch_bit_operand(ctxt);
4676
op->orig_val = op->val;
4677
break;
4678
case OpMem64:
4679
ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4680
goto mem_common;
4681
case OpAcc:
4682
op->type = OP_REG;
4683
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4684
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4685
fetch_register_operand(op);
4686
op->orig_val = op->val;
4687
break;
4688
case OpAccLo:
4689
op->type = OP_REG;
4690
op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4691
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4692
fetch_register_operand(op);
4693
op->orig_val = op->val;
4694
break;
4695
case OpAccHi:
4696
if (ctxt->d & ByteOp) {
4697
op->type = OP_NONE;
4698
break;
4699
}
4700
op->type = OP_REG;
4701
op->bytes = ctxt->op_bytes;
4702
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4703
fetch_register_operand(op);
4704
op->orig_val = op->val;
4705
break;
4706
case OpDI:
4707
op->type = OP_MEM;
4708
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4709
op->addr.mem.ea =
4710
register_address(ctxt, VCPU_REGS_RDI);
4711
op->addr.mem.seg = VCPU_SREG_ES;
4712
op->val = 0;
4713
op->count = 1;
4714
break;
4715
case OpDX:
4716
op->type = OP_REG;
4717
op->bytes = 2;
4718
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4719
fetch_register_operand(op);
4720
break;
4721
case OpCL:
4722
op->type = OP_IMM;
4723
op->bytes = 1;
4724
op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4725
break;
4726
case OpImmByte:
4727
rc = decode_imm(ctxt, op, 1, true);
4728
break;
4729
case OpOne:
4730
op->type = OP_IMM;
4731
op->bytes = 1;
4732
op->val = 1;
4733
break;
4734
case OpImm:
4735
rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4736
break;
4737
case OpImm64:
4738
rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4739
break;
4740
case OpMem8:
4741
ctxt->memop.bytes = 1;
4742
if (ctxt->memop.type == OP_REG) {
4743
ctxt->memop.addr.reg = decode_register(ctxt,
4744
ctxt->modrm_rm, true);
4745
fetch_register_operand(&ctxt->memop);
4746
}
4747
goto mem_common;
4748
case OpMem16:
4749
ctxt->memop.bytes = 2;
4750
goto mem_common;
4751
case OpMem32:
4752
ctxt->memop.bytes = 4;
4753
goto mem_common;
4754
case OpImmU16:
4755
rc = decode_imm(ctxt, op, 2, false);
4756
break;
4757
case OpImmU:
4758
rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4759
break;
4760
case OpSI:
4761
op->type = OP_MEM;
4762
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4763
op->addr.mem.ea =
4764
register_address(ctxt, VCPU_REGS_RSI);
4765
op->addr.mem.seg = ctxt->seg_override;
4766
op->val = 0;
4767
op->count = 1;
4768
break;
4769
case OpXLat:
4770
op->type = OP_MEM;
4771
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4772
op->addr.mem.ea =
4773
address_mask(ctxt,
4774
reg_read(ctxt, VCPU_REGS_RBX) +
4775
(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4776
op->addr.mem.seg = ctxt->seg_override;
4777
op->val = 0;
4778
break;
4779
case OpImmFAddr:
4780
op->type = OP_IMM;
4781
op->addr.mem.ea = ctxt->_eip;
4782
op->bytes = ctxt->op_bytes + 2;
4783
insn_fetch_arr(op->valptr, op->bytes, ctxt);
4784
break;
4785
case OpMemFAddr:
4786
ctxt->memop.bytes = ctxt->op_bytes + 2;
4787
goto mem_common;
4788
case OpES:
4789
op->type = OP_IMM;
4790
op->val = VCPU_SREG_ES;
4791
break;
4792
case OpCS:
4793
op->type = OP_IMM;
4794
op->val = VCPU_SREG_CS;
4795
break;
4796
case OpSS:
4797
op->type = OP_IMM;
4798
op->val = VCPU_SREG_SS;
4799
break;
4800
case OpDS:
4801
op->type = OP_IMM;
4802
op->val = VCPU_SREG_DS;
4803
break;
4804
case OpFS:
4805
op->type = OP_IMM;
4806
op->val = VCPU_SREG_FS;
4807
break;
4808
case OpGS:
4809
op->type = OP_IMM;
4810
op->val = VCPU_SREG_GS;
4811
break;
4812
case OpImplicit:
4813
/* Special instructions do their own operand decoding. */
4814
default:
4815
op->type = OP_NONE; /* Disable writeback. */
4816
break;
4817
}
4818
4819
done:
4820
return rc;
4821
}
4822
4823
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
4824
{
4825
int rc = X86EMUL_CONTINUE;
4826
int mode = ctxt->mode;
4827
int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4828
bool op_prefix = false;
4829
bool has_seg_override = false;
4830
struct opcode opcode;
4831
u16 dummy;
4832
struct desc_struct desc;
4833
4834
ctxt->memop.type = OP_NONE;
4835
ctxt->memopp = NULL;
4836
ctxt->_eip = ctxt->eip;
4837
ctxt->fetch.ptr = ctxt->fetch.data;
4838
ctxt->fetch.end = ctxt->fetch.data + insn_len;
4839
ctxt->opcode_len = 1;
4840
ctxt->intercept = x86_intercept_none;
4841
if (insn_len > 0)
4842
memcpy(ctxt->fetch.data, insn, insn_len);
4843
else {
4844
rc = __do_insn_fetch_bytes(ctxt, 1);
4845
if (rc != X86EMUL_CONTINUE)
4846
goto done;
4847
}
4848
4849
switch (mode) {
4850
case X86EMUL_MODE_REAL:
4851
case X86EMUL_MODE_VM86:
4852
def_op_bytes = def_ad_bytes = 2;
4853
ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
4854
if (desc.d)
4855
def_op_bytes = def_ad_bytes = 4;
4856
break;
4857
case X86EMUL_MODE_PROT16:
4858
def_op_bytes = def_ad_bytes = 2;
4859
break;
4860
case X86EMUL_MODE_PROT32:
4861
def_op_bytes = def_ad_bytes = 4;
4862
break;
4863
#ifdef CONFIG_X86_64
4864
case X86EMUL_MODE_PROT64:
4865
def_op_bytes = 4;
4866
def_ad_bytes = 8;
4867
break;
4868
#endif
4869
default:
4870
return EMULATION_FAILED;
4871
}
4872
4873
ctxt->op_bytes = def_op_bytes;
4874
ctxt->ad_bytes = def_ad_bytes;
4875
4876
/* Legacy prefixes. */
4877
for (;;) {
4878
switch (ctxt->b = insn_fetch(u8, ctxt)) {
4879
case 0x66: /* operand-size override */
4880
op_prefix = true;
4881
/* switch between 2/4 bytes */
4882
ctxt->op_bytes = def_op_bytes ^ 6;
4883
break;
4884
case 0x67: /* address-size override */
4885
if (mode == X86EMUL_MODE_PROT64)
4886
/* switch between 4/8 bytes */
4887
ctxt->ad_bytes = def_ad_bytes ^ 12;
4888
else
4889
/* switch between 2/4 bytes */
4890
ctxt->ad_bytes = def_ad_bytes ^ 6;
4891
break;
4892
case 0x26: /* ES override */
4893
has_seg_override = true;
4894
ctxt->seg_override = VCPU_SREG_ES;
4895
break;
4896
case 0x2e: /* CS override */
4897
has_seg_override = true;
4898
ctxt->seg_override = VCPU_SREG_CS;
4899
break;
4900
case 0x36: /* SS override */
4901
has_seg_override = true;
4902
ctxt->seg_override = VCPU_SREG_SS;
4903
break;
4904
case 0x3e: /* DS override */
4905
has_seg_override = true;
4906
ctxt->seg_override = VCPU_SREG_DS;
4907
break;
4908
case 0x64: /* FS override */
4909
has_seg_override = true;
4910
ctxt->seg_override = VCPU_SREG_FS;
4911
break;
4912
case 0x65: /* GS override */
4913
has_seg_override = true;
4914
ctxt->seg_override = VCPU_SREG_GS;
4915
break;
4916
case 0x40 ... 0x4f: /* REX */
4917
if (mode != X86EMUL_MODE_PROT64)
4918
goto done_prefixes;
4919
ctxt->rex_prefix = ctxt->b;
4920
continue;
4921
case 0xf0: /* LOCK */
4922
ctxt->lock_prefix = 1;
4923
break;
4924
case 0xf2: /* REPNE/REPNZ */
4925
case 0xf3: /* REP/REPE/REPZ */
4926
ctxt->rep_prefix = ctxt->b;
4927
break;
4928
default:
4929
goto done_prefixes;
4930
}
4931
4932
/* Any legacy prefix after a REX prefix nullifies its effect. */
4933
4934
ctxt->rex_prefix = 0;
4935
}
4936
4937
done_prefixes:
4938
4939
/* REX prefix. */
4940
if (ctxt->rex_prefix & 8)
4941
ctxt->op_bytes = 8; /* REX.W */
4942
4943
/* Opcode byte(s). */
4944
opcode = opcode_table[ctxt->b];
4945
/* Two-byte opcode? */
4946
if (ctxt->b == 0x0f) {
4947
ctxt->opcode_len = 2;
4948
ctxt->b = insn_fetch(u8, ctxt);
4949
opcode = twobyte_table[ctxt->b];
4950
4951
/* 0F_38 opcode map */
4952
if (ctxt->b == 0x38) {
4953
ctxt->opcode_len = 3;
4954
ctxt->b = insn_fetch(u8, ctxt);
4955
opcode = opcode_map_0f_38[ctxt->b];
4956
}
4957
}
4958
ctxt->d = opcode.flags;
4959
4960
if (ctxt->d & ModRM)
4961
ctxt->modrm = insn_fetch(u8, ctxt);
4962
4963
/* vex-prefix instructions are not implemented */
4964
if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4965
(mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4966
ctxt->d = NotImpl;
4967
}
4968
4969
while (ctxt->d & GroupMask) {
4970
switch (ctxt->d & GroupMask) {
4971
case Group:
4972
goffset = (ctxt->modrm >> 3) & 7;
4973
opcode = opcode.u.group[goffset];
4974
break;
4975
case GroupDual:
4976
goffset = (ctxt->modrm >> 3) & 7;
4977
if ((ctxt->modrm >> 6) == 3)
4978
opcode = opcode.u.gdual->mod3[goffset];
4979
else
4980
opcode = opcode.u.gdual->mod012[goffset];
4981
break;
4982
case RMExt:
4983
goffset = ctxt->modrm & 7;
4984
opcode = opcode.u.group[goffset];
4985
break;
4986
case Prefix:
4987
if (ctxt->rep_prefix && op_prefix)
4988
return EMULATION_FAILED;
4989
simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4990
switch (simd_prefix) {
4991
case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4992
case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4993
case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4994
case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4995
}
4996
break;
4997
case Escape:
4998
if (ctxt->modrm > 0xbf) {
4999
size_t size = ARRAY_SIZE(opcode.u.esc->high);
5000
u32 index = array_index_nospec(
5001
ctxt->modrm - 0xc0, size);
5002
5003
opcode = opcode.u.esc->high[index];
5004
} else {
5005
opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5006
}
5007
break;
5008
case InstrDual:
5009
if ((ctxt->modrm >> 6) == 3)
5010
opcode = opcode.u.idual->mod3;
5011
else
5012
opcode = opcode.u.idual->mod012;
5013
break;
5014
case ModeDual:
5015
if (ctxt->mode == X86EMUL_MODE_PROT64)
5016
opcode = opcode.u.mdual->mode64;
5017
else
5018
opcode = opcode.u.mdual->mode32;
5019
break;
5020
default:
5021
return EMULATION_FAILED;
5022
}
5023
5024
ctxt->d &= ~(u64)GroupMask;
5025
ctxt->d |= opcode.flags;
5026
}
5027
5028
ctxt->is_branch = opcode.flags & IsBranch;
5029
5030
/* Unrecognised? */
5031
if (ctxt->d == 0)
5032
return EMULATION_FAILED;
5033
5034
ctxt->execute = opcode.u.execute;
5035
5036
/*
5037
* Reject emulation if KVM might need to emulate shadow stack updates
5038
* and/or indirect branch tracking enforcement, which the emulator
5039
* doesn't support.
5040
*/
5041
if ((is_ibt_instruction(ctxt) || is_shstk_instruction(ctxt)) &&
5042
ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
5043
u64 u_cet = 0, s_cet = 0;
5044
5045
/*
5046
* Check both User and Supervisor on far transfers as inter-
5047
* privilege level transfers are impacted by CET at the target
5048
* privilege level, and that is not known at this time. The
5049
* expectation is that the guest will not require emulation of
5050
* any CET-affected instructions at any privilege level.
5051
*/
5052
if (!(ctxt->d & NearBranch))
5053
u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5054
else if (ctxt->ops->cpl(ctxt) == 3)
5055
u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5056
else
5057
s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5058
5059
if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
5060
(s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
5061
return EMULATION_FAILED;
5062
5063
if ((u_cet | s_cet) & CET_SHSTK_EN && is_shstk_instruction(ctxt))
5064
return EMULATION_FAILED;
5065
5066
if ((u_cet | s_cet) & CET_ENDBR_EN && is_ibt_instruction(ctxt))
5067
return EMULATION_FAILED;
5068
}
5069
5070
if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
5071
likely(!(ctxt->d & EmulateOnUD)))
5072
return EMULATION_FAILED;
5073
5074
if (unlikely(ctxt->d &
5075
(NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5076
No16))) {
5077
/*
5078
* These are copied unconditionally here, and checked unconditionally
5079
* in x86_emulate_insn.
5080
*/
5081
ctxt->check_perm = opcode.check_perm;
5082
ctxt->intercept = opcode.intercept;
5083
5084
if (ctxt->d & NotImpl)
5085
return EMULATION_FAILED;
5086
5087
if (mode == X86EMUL_MODE_PROT64) {
5088
if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5089
ctxt->op_bytes = 8;
5090
else if (ctxt->d & NearBranch)
5091
ctxt->op_bytes = 8;
5092
}
5093
5094
if (ctxt->d & Op3264) {
5095
if (mode == X86EMUL_MODE_PROT64)
5096
ctxt->op_bytes = 8;
5097
else
5098
ctxt->op_bytes = 4;
5099
}
5100
5101
if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5102
ctxt->op_bytes = 4;
5103
5104
if (ctxt->d & Sse)
5105
ctxt->op_bytes = 16;
5106
else if (ctxt->d & Mmx)
5107
ctxt->op_bytes = 8;
5108
}
5109
5110
/* ModRM and SIB bytes. */
5111
if (ctxt->d & ModRM) {
5112
rc = decode_modrm(ctxt, &ctxt->memop);
5113
if (!has_seg_override) {
5114
has_seg_override = true;
5115
ctxt->seg_override = ctxt->modrm_seg;
5116
}
5117
} else if (ctxt->d & MemAbs)
5118
rc = decode_abs(ctxt, &ctxt->memop);
5119
if (rc != X86EMUL_CONTINUE)
5120
goto done;
5121
5122
if (!has_seg_override)
5123
ctxt->seg_override = VCPU_SREG_DS;
5124
5125
ctxt->memop.addr.mem.seg = ctxt->seg_override;
5126
5127
/*
5128
* Decode and fetch the source operand: register, memory
5129
* or immediate.
5130
*/
5131
rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5132
if (rc != X86EMUL_CONTINUE)
5133
goto done;
5134
5135
/*
5136
* Decode and fetch the second source operand: register, memory
5137
* or immediate.
5138
*/
5139
rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5140
if (rc != X86EMUL_CONTINUE)
5141
goto done;
5142
5143
/* Decode and fetch the destination operand: register or memory. */
5144
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5145
5146
if (ctxt->rip_relative && likely(ctxt->memopp))
5147
ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5148
ctxt->memopp->addr.mem.ea + ctxt->_eip);
5149
5150
done:
5151
if (rc == X86EMUL_PROPAGATE_FAULT)
5152
ctxt->have_exception = true;
5153
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5154
}
5155
5156
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5157
{
5158
return ctxt->d & PageTable;
5159
}
5160
5161
static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5162
{
5163
/* The second termination condition only applies for REPE
5164
* and REPNE. Test if the repeat string operation prefix is
5165
* REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5166
* corresponding termination condition according to:
5167
* - if REPE/REPZ and ZF = 0 then done
5168
* - if REPNE/REPNZ and ZF = 1 then done
5169
*/
5170
if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5171
(ctxt->b == 0xae) || (ctxt->b == 0xaf))
5172
&& (((ctxt->rep_prefix == REPE_PREFIX) &&
5173
((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5174
|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
5175
((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5176
return true;
5177
5178
return false;
5179
}
5180
5181
static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5182
{
5183
int rc;
5184
5185
kvm_fpu_get();
5186
rc = asm_safe("fwait");
5187
kvm_fpu_put();
5188
5189
if (unlikely(rc != X86EMUL_CONTINUE))
5190
return emulate_exception(ctxt, MF_VECTOR, 0, false);
5191
5192
return X86EMUL_CONTINUE;
5193
}
5194
5195
static void fetch_possible_mmx_operand(struct operand *op)
5196
{
5197
if (op->type == OP_MM)
5198
kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5199
}
5200
5201
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5202
{
5203
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5204
5205
if (!(ctxt->d & ByteOp))
5206
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5207
5208
asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5209
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5210
[thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5211
: "c"(ctxt->src2.val));
5212
5213
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5214
if (!fop) /* exception is returned in fop variable */
5215
return emulate_de(ctxt);
5216
return X86EMUL_CONTINUE;
5217
}
5218
5219
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5220
{
5221
/* Clear fields that are set conditionally but read without a guard. */
5222
ctxt->rip_relative = false;
5223
ctxt->rex_prefix = 0;
5224
ctxt->lock_prefix = 0;
5225
ctxt->rep_prefix = 0;
5226
ctxt->regs_valid = 0;
5227
ctxt->regs_dirty = 0;
5228
5229
ctxt->io_read.pos = 0;
5230
ctxt->io_read.end = 0;
5231
ctxt->mem_read.end = 0;
5232
}
5233
5234
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
5235
{
5236
const struct x86_emulate_ops *ops = ctxt->ops;
5237
int rc = X86EMUL_CONTINUE;
5238
int saved_dst_type = ctxt->dst.type;
5239
5240
ctxt->mem_read.pos = 0;
5241
5242
/* LOCK prefix is allowed only with some instructions */
5243
if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5244
rc = emulate_ud(ctxt);
5245
goto done;
5246
}
5247
5248
if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5249
rc = emulate_ud(ctxt);
5250
goto done;
5251
}
5252
5253
if (unlikely(ctxt->d &
5254
(No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5255
if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5256
(ctxt->d & Undefined)) {
5257
rc = emulate_ud(ctxt);
5258
goto done;
5259
}
5260
5261
if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5262
|| ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5263
rc = emulate_ud(ctxt);
5264
goto done;
5265
}
5266
5267
if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5268
rc = emulate_nm(ctxt);
5269
goto done;
5270
}
5271
5272
if (ctxt->d & Mmx) {
5273
rc = flush_pending_x87_faults(ctxt);
5274
if (rc != X86EMUL_CONTINUE)
5275
goto done;
5276
/*
5277
* Now that we know the fpu is exception safe, we can fetch
5278
* operands from it.
5279
*/
5280
fetch_possible_mmx_operand(&ctxt->src);
5281
fetch_possible_mmx_operand(&ctxt->src2);
5282
if (!(ctxt->d & Mov))
5283
fetch_possible_mmx_operand(&ctxt->dst);
5284
}
5285
5286
if (unlikely(check_intercepts) && ctxt->intercept) {
5287
rc = emulator_check_intercept(ctxt, ctxt->intercept,
5288
X86_ICPT_PRE_EXCEPT);
5289
if (rc != X86EMUL_CONTINUE)
5290
goto done;
5291
}
5292
5293
/* Instruction can only be executed in protected mode */
5294
if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5295
rc = emulate_ud(ctxt);
5296
goto done;
5297
}
5298
5299
/* Privileged instruction can be executed only in CPL=0 */
5300
if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5301
if (ctxt->d & PrivUD)
5302
rc = emulate_ud(ctxt);
5303
else
5304
rc = emulate_gp(ctxt, 0);
5305
goto done;
5306
}
5307
5308
/* Do instruction specific permission checks */
5309
if (ctxt->d & CheckPerm) {
5310
rc = ctxt->check_perm(ctxt);
5311
if (rc != X86EMUL_CONTINUE)
5312
goto done;
5313
}
5314
5315
if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
5316
rc = emulator_check_intercept(ctxt, ctxt->intercept,
5317
X86_ICPT_POST_EXCEPT);
5318
if (rc != X86EMUL_CONTINUE)
5319
goto done;
5320
}
5321
5322
if (ctxt->rep_prefix && (ctxt->d & String)) {
5323
/* All REP prefixes have the same first termination condition */
5324
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5325
string_registers_quirk(ctxt);
5326
ctxt->eip = ctxt->_eip;
5327
ctxt->eflags &= ~X86_EFLAGS_RF;
5328
goto done;
5329
}
5330
}
5331
}
5332
5333
if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5334
rc = segmented_read(ctxt, ctxt->src.addr.mem,
5335
ctxt->src.valptr, ctxt->src.bytes);
5336
if (rc != X86EMUL_CONTINUE)
5337
goto done;
5338
ctxt->src.orig_val64 = ctxt->src.val64;
5339
}
5340
5341
if (ctxt->src2.type == OP_MEM) {
5342
rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5343
&ctxt->src2.val, ctxt->src2.bytes);
5344
if (rc != X86EMUL_CONTINUE)
5345
goto done;
5346
}
5347
5348
if ((ctxt->d & DstMask) == ImplicitOps)
5349
goto special_insn;
5350
5351
5352
if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5353
/* optimisation - avoid slow emulated read if Mov */
5354
rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5355
&ctxt->dst.val, ctxt->dst.bytes);
5356
if (rc != X86EMUL_CONTINUE) {
5357
if (!(ctxt->d & NoWrite) &&
5358
rc == X86EMUL_PROPAGATE_FAULT &&
5359
ctxt->exception.vector == PF_VECTOR)
5360
ctxt->exception.error_code |= PFERR_WRITE_MASK;
5361
goto done;
5362
}
5363
}
5364
/* Copy full 64-bit value for CMPXCHG8B. */
5365
ctxt->dst.orig_val64 = ctxt->dst.val64;
5366
5367
special_insn:
5368
5369
if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
5370
rc = emulator_check_intercept(ctxt, ctxt->intercept,
5371
X86_ICPT_POST_MEMACCESS);
5372
if (rc != X86EMUL_CONTINUE)
5373
goto done;
5374
}
5375
5376
if (ctxt->rep_prefix && (ctxt->d & String))
5377
ctxt->eflags |= X86_EFLAGS_RF;
5378
else
5379
ctxt->eflags &= ~X86_EFLAGS_RF;
5380
5381
if (ctxt->execute) {
5382
if (ctxt->d & Fastop)
5383
rc = fastop(ctxt, ctxt->fop);
5384
else
5385
rc = ctxt->execute(ctxt);
5386
if (rc != X86EMUL_CONTINUE)
5387
goto done;
5388
goto writeback;
5389
}
5390
5391
if (ctxt->opcode_len == 2)
5392
goto twobyte_insn;
5393
else if (ctxt->opcode_len == 3)
5394
goto threebyte_insn;
5395
5396
switch (ctxt->b) {
5397
case 0x70 ... 0x7f: /* jcc (short) */
5398
if (test_cc(ctxt->b, ctxt->eflags))
5399
rc = jmp_rel(ctxt, ctxt->src.val);
5400
break;
5401
case 0x8d: /* lea r16/r32, m */
5402
ctxt->dst.val = ctxt->src.addr.mem.ea;
5403
break;
5404
case 0x90 ... 0x97: /* nop / xchg reg, rax */
5405
if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5406
ctxt->dst.type = OP_NONE;
5407
else
5408
rc = em_xchg(ctxt);
5409
break;
5410
case 0x98: /* cbw/cwde/cdqe */
5411
switch (ctxt->op_bytes) {
5412
case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5413
case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5414
case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5415
}
5416
break;
5417
case 0xcc: /* int3 */
5418
rc = emulate_int(ctxt, 3);
5419
break;
5420
case 0xcd: /* int n */
5421
rc = emulate_int(ctxt, ctxt->src.val);
5422
break;
5423
case 0xce: /* into */
5424
if (ctxt->eflags & X86_EFLAGS_OF)
5425
rc = emulate_int(ctxt, 4);
5426
break;
5427
case 0xe9: /* jmp rel */
5428
case 0xeb: /* jmp rel short */
5429
rc = jmp_rel(ctxt, ctxt->src.val);
5430
ctxt->dst.type = OP_NONE; /* Disable writeback. */
5431
break;
5432
case 0xf4: /* hlt */
5433
ctxt->ops->halt(ctxt);
5434
break;
5435
case 0xf5: /* cmc */
5436
/* complement carry flag from eflags reg */
5437
ctxt->eflags ^= X86_EFLAGS_CF;
5438
break;
5439
case 0xf8: /* clc */
5440
ctxt->eflags &= ~X86_EFLAGS_CF;
5441
break;
5442
case 0xf9: /* stc */
5443
ctxt->eflags |= X86_EFLAGS_CF;
5444
break;
5445
case 0xfc: /* cld */
5446
ctxt->eflags &= ~X86_EFLAGS_DF;
5447
break;
5448
case 0xfd: /* std */
5449
ctxt->eflags |= X86_EFLAGS_DF;
5450
break;
5451
default:
5452
goto cannot_emulate;
5453
}
5454
5455
if (rc != X86EMUL_CONTINUE)
5456
goto done;
5457
5458
writeback:
5459
if (ctxt->d & SrcWrite) {
5460
BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5461
rc = writeback(ctxt, &ctxt->src);
5462
if (rc != X86EMUL_CONTINUE)
5463
goto done;
5464
}
5465
if (!(ctxt->d & NoWrite)) {
5466
rc = writeback(ctxt, &ctxt->dst);
5467
if (rc != X86EMUL_CONTINUE)
5468
goto done;
5469
}
5470
5471
/*
5472
* restore dst type in case the decoding will be reused
5473
* (happens for string instruction )
5474
*/
5475
ctxt->dst.type = saved_dst_type;
5476
5477
if ((ctxt->d & SrcMask) == SrcSI)
5478
string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5479
5480
if ((ctxt->d & DstMask) == DstDI)
5481
string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5482
5483
if (ctxt->rep_prefix && (ctxt->d & String)) {
5484
unsigned int count;
5485
struct read_cache *r = &ctxt->io_read;
5486
if ((ctxt->d & SrcMask) == SrcSI)
5487
count = ctxt->src.count;
5488
else
5489
count = ctxt->dst.count;
5490
register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5491
5492
if (!string_insn_completed(ctxt)) {
5493
/*
5494
* Re-enter guest when pio read ahead buffer is empty
5495
* or, if it is not used, after each 1024 iteration.
5496
*/
5497
if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5498
(r->end == 0 || r->end != r->pos)) {
5499
/*
5500
* Reset read cache. Usually happens before
5501
* decode, but since instruction is restarted
5502
* we have to do it here.
5503
*/
5504
ctxt->mem_read.end = 0;
5505
writeback_registers(ctxt);
5506
return EMULATION_RESTART;
5507
}
5508
goto done; /* skip rip writeback */
5509
}
5510
ctxt->eflags &= ~X86_EFLAGS_RF;
5511
}
5512
5513
ctxt->eip = ctxt->_eip;
5514
if (ctxt->mode != X86EMUL_MODE_PROT64)
5515
ctxt->eip = (u32)ctxt->_eip;
5516
5517
done:
5518
if (rc == X86EMUL_PROPAGATE_FAULT) {
5519
if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5520
return EMULATION_FAILED;
5521
ctxt->have_exception = true;
5522
}
5523
if (rc == X86EMUL_INTERCEPTED)
5524
return EMULATION_INTERCEPTED;
5525
5526
if (rc == X86EMUL_CONTINUE)
5527
writeback_registers(ctxt);
5528
5529
return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5530
5531
twobyte_insn:
5532
switch (ctxt->b) {
5533
case 0x09: /* wbinvd */
5534
(ctxt->ops->wbinvd)(ctxt);
5535
break;
5536
case 0x08: /* invd */
5537
case 0x0d: /* GrpP (prefetch) */
5538
case 0x18: /* Grp16 (prefetch/nop) */
5539
case 0x1f: /* nop */
5540
break;
5541
case 0x20: /* mov cr, reg */
5542
ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5543
break;
5544
case 0x21: /* mov from dr to reg */
5545
ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg);
5546
break;
5547
case 0x40 ... 0x4f: /* cmov */
5548
if (test_cc(ctxt->b, ctxt->eflags))
5549
ctxt->dst.val = ctxt->src.val;
5550
else if (ctxt->op_bytes != 4)
5551
ctxt->dst.type = OP_NONE; /* no writeback */
5552
break;
5553
case 0x80 ... 0x8f: /* jnz rel, etc*/
5554
if (test_cc(ctxt->b, ctxt->eflags))
5555
rc = jmp_rel(ctxt, ctxt->src.val);
5556
break;
5557
case 0x90 ... 0x9f: /* setcc r/m8 */
5558
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5559
break;
5560
case 0xb6 ... 0xb7: /* movzx */
5561
ctxt->dst.bytes = ctxt->op_bytes;
5562
ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5563
: (u16) ctxt->src.val;
5564
break;
5565
case 0xbe ... 0xbf: /* movsx */
5566
ctxt->dst.bytes = ctxt->op_bytes;
5567
ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5568
(s16) ctxt->src.val;
5569
break;
5570
default:
5571
goto cannot_emulate;
5572
}
5573
5574
threebyte_insn:
5575
5576
if (rc != X86EMUL_CONTINUE)
5577
goto done;
5578
5579
goto writeback;
5580
5581
cannot_emulate:
5582
return EMULATION_FAILED;
5583
}
5584
5585
void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5586
{
5587
invalidate_registers(ctxt);
5588
}
5589
5590
void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5591
{
5592
writeback_registers(ctxt);
5593
}
5594
5595
bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5596
{
5597
if (ctxt->rep_prefix && (ctxt->d & String))
5598
return false;
5599
5600
if (ctxt->d & TwoMemOp)
5601
return false;
5602
5603
return true;
5604
}
5605
5606