Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/at.c
29524 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2017 - Linaro Ltd
4
* Author: Jintack Lim <[email protected]>
5
*/
6
7
#include <linux/kvm_host.h>
8
9
#include <asm/esr.h>
10
#include <asm/kvm_hyp.h>
11
#include <asm/kvm_mmu.h>
12
13
static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
14
{
15
wr->fst = fst;
16
wr->ptw = s1ptw;
17
wr->s2 = s1ptw;
18
wr->failed = true;
19
}
20
21
#define S1_MMU_DISABLED (-127)
22
23
static int get_ia_size(struct s1_walk_info *wi)
24
{
25
return 64 - wi->txsz;
26
}
27
28
/* Return true if the IPA is out of the OA range */
29
static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
30
{
31
if (wi->pa52bit)
32
return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
33
return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
34
}
35
36
static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
37
{
38
switch (BIT(wi->pgshift)) {
39
case SZ_64K:
40
default: /* IMPDEF: treat any other value as 64k */
41
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
42
return false;
43
return ((wi->regime == TR_EL2 ?
44
FIELD_GET(TCR_EL2_PS_MASK, tcr) :
45
FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
46
case SZ_16K:
47
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
48
return false;
49
break;
50
case SZ_4K:
51
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
52
return false;
53
break;
54
}
55
56
return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
57
}
58
59
static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
60
{
61
u64 addr;
62
63
if (!wi->pa52bit)
64
return desc & GENMASK_ULL(47, wi->pgshift);
65
66
switch (BIT(wi->pgshift)) {
67
case SZ_4K:
68
case SZ_16K:
69
addr = desc & GENMASK_ULL(49, wi->pgshift);
70
addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
71
break;
72
case SZ_64K:
73
default: /* IMPDEF: treat any other value as 64k */
74
addr = desc & GENMASK_ULL(47, wi->pgshift);
75
addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
76
break;
77
}
78
79
return addr;
80
}
81
82
/* Return the translation regime that applies to an AT instruction */
83
static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
84
{
85
/*
86
* We only get here from guest EL2, so the translation
87
* regime AT applies to is solely defined by {E2H,TGE}.
88
*/
89
switch (op) {
90
case OP_AT_S1E2R:
91
case OP_AT_S1E2W:
92
case OP_AT_S1E2A:
93
return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
94
break;
95
default:
96
return (vcpu_el2_e2h_is_set(vcpu) &&
97
vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
98
}
99
}
100
101
static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
102
{
103
if (regime == TR_EL10) {
104
if (vcpu_has_nv(vcpu) &&
105
!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
106
return 0;
107
108
return vcpu_read_sys_reg(vcpu, TCR2_EL1);
109
}
110
111
return vcpu_read_sys_reg(vcpu, TCR2_EL2);
112
}
113
114
static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
115
{
116
if (!kvm_has_s1pie(vcpu->kvm))
117
return false;
118
119
/* Abuse TCR2_EL1_PIE and use it for EL2 as well */
120
return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
121
}
122
123
static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
124
{
125
u64 val;
126
127
if (!kvm_has_s1poe(vcpu->kvm)) {
128
wi->poe = wi->e0poe = false;
129
return;
130
}
131
132
val = effective_tcr2(vcpu, wi->regime);
133
134
/* Abuse TCR2_EL1_* for EL2 */
135
wi->poe = val & TCR2_EL1_POE;
136
wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
137
}
138
139
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
140
struct s1_walk_result *wr, u64 va)
141
{
142
u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
143
unsigned int stride, x;
144
bool va55, tbi, lva;
145
146
va55 = va & BIT(55);
147
148
if (vcpu_has_nv(vcpu)) {
149
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
150
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
151
} else {
152
WARN_ON_ONCE(wi->regime != TR_EL10);
153
wi->s2 = false;
154
hcr = 0;
155
}
156
157
switch (wi->regime) {
158
case TR_EL10:
159
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
160
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
161
ttbr = (va55 ?
162
vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
163
vcpu_read_sys_reg(vcpu, TTBR0_EL1));
164
break;
165
case TR_EL2:
166
case TR_EL20:
167
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
168
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
169
ttbr = (va55 ?
170
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
171
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
172
break;
173
default:
174
BUG();
175
}
176
177
/* Someone was silly enough to encode TG0/TG1 differently */
178
if (va55 && wi->regime != TR_EL2) {
179
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
180
tg = FIELD_GET(TCR_TG1_MASK, tcr);
181
182
switch (tg << TCR_TG1_SHIFT) {
183
case TCR_TG1_4K:
184
wi->pgshift = 12; break;
185
case TCR_TG1_16K:
186
wi->pgshift = 14; break;
187
case TCR_TG1_64K:
188
default: /* IMPDEF: treat any other value as 64k */
189
wi->pgshift = 16; break;
190
}
191
} else {
192
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
193
tg = FIELD_GET(TCR_TG0_MASK, tcr);
194
195
switch (tg << TCR_TG0_SHIFT) {
196
case TCR_TG0_4K:
197
wi->pgshift = 12; break;
198
case TCR_TG0_16K:
199
wi->pgshift = 14; break;
200
case TCR_TG0_64K:
201
default: /* IMPDEF: treat any other value as 64k */
202
wi->pgshift = 16; break;
203
}
204
}
205
206
wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
207
208
ia_bits = get_ia_size(wi);
209
210
/* AArch64.S1StartLevel() */
211
stride = wi->pgshift - 3;
212
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
213
214
if (wi->regime == TR_EL2 && va55)
215
goto addrsz;
216
217
tbi = (wi->regime == TR_EL2 ?
218
FIELD_GET(TCR_EL2_TBI, tcr) :
219
(va55 ?
220
FIELD_GET(TCR_TBI1, tcr) :
221
FIELD_GET(TCR_TBI0, tcr)));
222
223
if (!tbi && (u64)sign_extend64(va, 55) != va)
224
goto addrsz;
225
226
wi->sh = (wi->regime == TR_EL2 ?
227
FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
228
(va55 ?
229
FIELD_GET(TCR_SH1_MASK, tcr) :
230
FIELD_GET(TCR_SH0_MASK, tcr)));
231
232
va = (u64)sign_extend64(va, 55);
233
234
/* Let's put the MMU disabled case aside immediately */
235
switch (wi->regime) {
236
case TR_EL10:
237
/*
238
* If dealing with the EL1&0 translation regime, 3 things
239
* can disable the S1 translation:
240
*
241
* - HCR_EL2.DC = 1
242
* - HCR_EL2.{E2H,TGE} = {0,1}
243
* - SCTLR_EL1.M = 0
244
*
245
* The TGE part is interesting. If we have decided that this
246
* is EL1&0, then it means that either {E2H,TGE} == {1,0} or
247
* {0,x}, and we only need to test for TGE == 1.
248
*/
249
if (hcr & (HCR_DC | HCR_TGE)) {
250
wr->level = S1_MMU_DISABLED;
251
break;
252
}
253
fallthrough;
254
case TR_EL2:
255
case TR_EL20:
256
if (!(sctlr & SCTLR_ELx_M))
257
wr->level = S1_MMU_DISABLED;
258
break;
259
}
260
261
if (wr->level == S1_MMU_DISABLED) {
262
if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
263
goto addrsz;
264
265
wr->pa = va;
266
return 0;
267
}
268
269
wi->be = sctlr & SCTLR_ELx_EE;
270
271
wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
272
wi->hpd &= (wi->regime == TR_EL2 ?
273
FIELD_GET(TCR_EL2_HPD, tcr) :
274
(va55 ?
275
FIELD_GET(TCR_HPD1, tcr) :
276
FIELD_GET(TCR_HPD0, tcr)));
277
/* R_JHSVW */
278
wi->hpd |= s1pie_enabled(vcpu, wi->regime);
279
280
/* Do we have POE? */
281
compute_s1poe(vcpu, wi);
282
283
/* R_BVXDG */
284
wi->hpd |= (wi->poe || wi->e0poe);
285
286
/* R_PLCGL, R_YXNYW */
287
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
288
if (wi->txsz > 39)
289
goto transfault;
290
} else {
291
if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
292
goto transfault;
293
}
294
295
/* R_GTJBY, R_SXWGM */
296
switch (BIT(wi->pgshift)) {
297
case SZ_4K:
298
case SZ_16K:
299
lva = wi->pa52bit;
300
break;
301
case SZ_64K:
302
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
303
break;
304
}
305
306
if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
307
goto transfault;
308
309
/* R_YYVYV, I_THCZK */
310
if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
311
(va55 && va < GENMASK(63, ia_bits)))
312
goto transfault;
313
314
/* I_ZFSYQ */
315
if (wi->regime != TR_EL2 &&
316
(tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
317
goto transfault;
318
319
/* R_BNDVG and following statements */
320
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
321
wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
322
goto transfault;
323
324
ps = (wi->regime == TR_EL2 ?
325
FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
326
327
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
328
329
/* Compute minimal alignment */
330
x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
331
332
wi->baddr = ttbr & TTBRx_EL1_BADDR;
333
if (wi->pa52bit) {
334
/*
335
* Force the alignment on 64 bytes for top-level tables
336
* smaller than 8 entries, since TTBR.BADDR[5:2] are used to
337
* store bits [51:48] of the first level of lookup.
338
*/
339
x = max(x, 6);
340
341
wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
342
}
343
344
/* R_VPBBF */
345
if (check_output_size(wi->baddr, wi))
346
goto addrsz;
347
348
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
349
350
return 0;
351
352
addrsz:
353
/*
354
* Address Size Fault level 0 to indicate it comes from TTBR.
355
* yes, this is an oddity.
356
*/
357
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
358
return -EFAULT;
359
360
transfault:
361
/* Translation Fault on start level */
362
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
363
return -EFAULT;
364
}
365
366
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
367
struct s1_walk_result *wr, u64 va)
368
{
369
u64 va_top, va_bottom, baddr, desc;
370
int level, stride, ret;
371
372
level = wi->sl;
373
stride = wi->pgshift - 3;
374
baddr = wi->baddr;
375
376
va_top = get_ia_size(wi) - 1;
377
378
while (1) {
379
u64 index, ipa;
380
381
va_bottom = (3 - level) * stride + wi->pgshift;
382
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
383
384
ipa = baddr | index;
385
386
if (wi->s2) {
387
struct kvm_s2_trans s2_trans = {};
388
389
ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
390
if (ret) {
391
fail_s1_walk(wr,
392
(s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
393
true);
394
return ret;
395
}
396
397
if (!kvm_s2_trans_readable(&s2_trans)) {
398
fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
399
true);
400
401
return -EPERM;
402
}
403
404
ipa = kvm_s2_trans_output(&s2_trans);
405
}
406
407
if (wi->filter) {
408
ret = wi->filter->fn(&(struct s1_walk_context)
409
{
410
.wi = wi,
411
.table_ipa = baddr,
412
.level = level,
413
}, wi->filter->priv);
414
if (ret)
415
return ret;
416
}
417
418
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
419
if (ret) {
420
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
421
return ret;
422
}
423
424
if (wi->be)
425
desc = be64_to_cpu((__force __be64)desc);
426
else
427
desc = le64_to_cpu((__force __le64)desc);
428
429
/* Invalid descriptor */
430
if (!(desc & BIT(0)))
431
goto transfault;
432
433
/* Block mapping, check validity down the line */
434
if (!(desc & BIT(1)))
435
break;
436
437
/* Page mapping */
438
if (level == 3)
439
break;
440
441
/* Table handling */
442
if (!wi->hpd) {
443
wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
444
wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
445
wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
446
}
447
448
baddr = desc_to_oa(wi, desc);
449
450
/* Check for out-of-range OA */
451
if (check_output_size(baddr, wi))
452
goto addrsz;
453
454
/* Prepare for next round */
455
va_top = va_bottom - 1;
456
level++;
457
}
458
459
/* Block mapping, check the validity of the level */
460
if (!(desc & BIT(1))) {
461
bool valid_block = false;
462
463
switch (BIT(wi->pgshift)) {
464
case SZ_4K:
465
valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
466
break;
467
case SZ_16K:
468
case SZ_64K:
469
valid_block = level == 2 || (wi->pa52bit && level == 1);
470
break;
471
}
472
473
if (!valid_block)
474
goto transfault;
475
}
476
477
baddr = desc_to_oa(wi, desc);
478
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
479
goto addrsz;
480
481
if (!(desc & PTE_AF)) {
482
fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
483
return -EACCES;
484
}
485
486
va_bottom += contiguous_bit_shift(desc, wi, level);
487
488
wr->failed = false;
489
wr->level = level;
490
wr->desc = desc;
491
wr->pa = baddr & GENMASK(52, va_bottom);
492
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
493
494
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
495
if (wr->nG) {
496
u64 asid_ttbr, tcr;
497
498
switch (wi->regime) {
499
case TR_EL10:
500
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
501
asid_ttbr = ((tcr & TCR_A1) ?
502
vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
503
vcpu_read_sys_reg(vcpu, TTBR0_EL1));
504
break;
505
case TR_EL20:
506
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
507
asid_ttbr = ((tcr & TCR_A1) ?
508
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
509
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
510
break;
511
default:
512
BUG();
513
}
514
515
wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
516
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
517
!(tcr & TCR_ASID16))
518
wr->asid &= GENMASK(7, 0);
519
}
520
521
return 0;
522
523
addrsz:
524
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
525
return -EINVAL;
526
transfault:
527
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
528
return -ENOENT;
529
}
530
531
struct mmu_config {
532
u64 ttbr0;
533
u64 ttbr1;
534
u64 tcr;
535
u64 mair;
536
u64 tcr2;
537
u64 pir;
538
u64 pire0;
539
u64 por_el0;
540
u64 por_el1;
541
u64 sctlr;
542
u64 vttbr;
543
u64 vtcr;
544
};
545
546
static void __mmu_config_save(struct mmu_config *config)
547
{
548
config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
549
config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
550
config->tcr = read_sysreg_el1(SYS_TCR);
551
config->mair = read_sysreg_el1(SYS_MAIR);
552
if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
553
config->tcr2 = read_sysreg_el1(SYS_TCR2);
554
if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
555
config->pir = read_sysreg_el1(SYS_PIR);
556
config->pire0 = read_sysreg_el1(SYS_PIRE0);
557
}
558
if (system_supports_poe()) {
559
config->por_el1 = read_sysreg_el1(SYS_POR);
560
config->por_el0 = read_sysreg_s(SYS_POR_EL0);
561
}
562
}
563
config->sctlr = read_sysreg_el1(SYS_SCTLR);
564
config->vttbr = read_sysreg(vttbr_el2);
565
config->vtcr = read_sysreg(vtcr_el2);
566
}
567
568
static void __mmu_config_restore(struct mmu_config *config)
569
{
570
/*
571
* ARM errata 1165522 and 1530923 require TGE to be 1 before
572
* we update the guest state.
573
*/
574
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
575
576
write_sysreg_el1(config->ttbr0, SYS_TTBR0);
577
write_sysreg_el1(config->ttbr1, SYS_TTBR1);
578
write_sysreg_el1(config->tcr, SYS_TCR);
579
write_sysreg_el1(config->mair, SYS_MAIR);
580
if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
581
write_sysreg_el1(config->tcr2, SYS_TCR2);
582
if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
583
write_sysreg_el1(config->pir, SYS_PIR);
584
write_sysreg_el1(config->pire0, SYS_PIRE0);
585
}
586
if (system_supports_poe()) {
587
write_sysreg_el1(config->por_el1, SYS_POR);
588
write_sysreg_s(config->por_el0, SYS_POR_EL0);
589
}
590
}
591
write_sysreg_el1(config->sctlr, SYS_SCTLR);
592
write_sysreg(config->vttbr, vttbr_el2);
593
write_sysreg(config->vtcr, vtcr_el2);
594
}
595
596
static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
597
{
598
u64 host_pan;
599
bool fail;
600
601
host_pan = read_sysreg_s(SYS_PSTATE_PAN);
602
write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
603
604
switch (op) {
605
case OP_AT_S1E1RP:
606
fail = __kvm_at(OP_AT_S1E1RP, vaddr);
607
break;
608
case OP_AT_S1E1WP:
609
fail = __kvm_at(OP_AT_S1E1WP, vaddr);
610
break;
611
}
612
613
write_sysreg_s(host_pan, SYS_PSTATE_PAN);
614
615
return fail;
616
}
617
618
#define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
619
#define MEMATTR_NC 0b0100
620
#define MEMATTR_Wt 0b1000
621
#define MEMATTR_Wb 0b1100
622
#define MEMATTR_WbRaWa 0b1111
623
624
#define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
625
626
static u8 s2_memattr_to_attr(u8 memattr)
627
{
628
memattr &= 0b1111;
629
630
switch (memattr) {
631
case 0b0000:
632
case 0b0001:
633
case 0b0010:
634
case 0b0011:
635
return memattr << 2;
636
case 0b0100:
637
return MEMATTR(Wb, Wb);
638
case 0b0101:
639
return MEMATTR(NC, NC);
640
case 0b0110:
641
return MEMATTR(Wt, NC);
642
case 0b0111:
643
return MEMATTR(Wb, NC);
644
case 0b1000:
645
/* Reserved, assume NC */
646
return MEMATTR(NC, NC);
647
case 0b1001:
648
return MEMATTR(NC, Wt);
649
case 0b1010:
650
return MEMATTR(Wt, Wt);
651
case 0b1011:
652
return MEMATTR(Wb, Wt);
653
case 0b1100:
654
/* Reserved, assume NC */
655
return MEMATTR(NC, NC);
656
case 0b1101:
657
return MEMATTR(NC, Wb);
658
case 0b1110:
659
return MEMATTR(Wt, Wb);
660
case 0b1111:
661
return MEMATTR(Wb, Wb);
662
default:
663
unreachable();
664
}
665
}
666
667
static u8 combine_s1_s2_attr(u8 s1, u8 s2)
668
{
669
bool transient;
670
u8 final = 0;
671
672
/* Upgrade transient s1 to non-transient to simplify things */
673
switch (s1) {
674
case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
675
transient = true;
676
s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
677
break;
678
case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
679
transient = true;
680
s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
681
break;
682
default:
683
transient = false;
684
}
685
686
/* S2CombineS1AttrHints() */
687
if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
688
(s2 & GENMASK(3, 2)) == MEMATTR_NC)
689
final = MEMATTR_NC;
690
else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
691
(s2 & GENMASK(3, 2)) == MEMATTR_Wt)
692
final = MEMATTR_Wt;
693
else
694
final = MEMATTR_Wb;
695
696
if (final != MEMATTR_NC) {
697
/* Inherit RaWa hints form S1 */
698
if (transient) {
699
switch (s1 & GENMASK(3, 2)) {
700
case MEMATTR_Wt:
701
final = 0;
702
break;
703
case MEMATTR_Wb:
704
final = MEMATTR_NC;
705
break;
706
}
707
}
708
709
final |= s1 & GENMASK(1, 0);
710
}
711
712
return final;
713
}
714
715
#define ATTR_NSH 0b00
716
#define ATTR_RSV 0b01
717
#define ATTR_OSH 0b10
718
#define ATTR_ISH 0b11
719
720
static u8 compute_final_sh(u8 attr, u8 sh)
721
{
722
/* Any form of device, as well as NC has SH[1:0]=0b10 */
723
if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
724
return ATTR_OSH;
725
726
if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
727
sh = ATTR_NSH;
728
729
return sh;
730
}
731
732
static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
733
u8 attr)
734
{
735
u8 sh;
736
737
/*
738
* non-52bit and LPA have their basic shareability described in the
739
* descriptor. LPA2 gets it from the corresponding field in TCR,
740
* conveniently recorded in the walk info.
741
*/
742
if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
743
sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
744
else
745
sh = wi->sh;
746
747
return compute_final_sh(attr, sh);
748
}
749
750
static u8 combine_sh(u8 s1_sh, u8 s2_sh)
751
{
752
if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
753
return ATTR_OSH;
754
if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
755
return ATTR_ISH;
756
757
return ATTR_NSH;
758
}
759
760
static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
761
struct kvm_s2_trans *tr)
762
{
763
u8 s1_parattr, s2_memattr, final_attr, s2_sh;
764
u64 par;
765
766
/* If S2 has failed to translate, report the damage */
767
if (tr->esr) {
768
par = SYS_PAR_EL1_RES1;
769
par |= SYS_PAR_EL1_F;
770
par |= SYS_PAR_EL1_S;
771
par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
772
return par;
773
}
774
775
s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
776
s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
777
778
if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
779
if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
780
s2_memattr &= ~BIT(3);
781
782
/* Combination of R_VRJSW and R_RHWZM */
783
switch (s2_memattr) {
784
case 0b0101:
785
if (MEMATTR_IS_DEVICE(s1_parattr))
786
final_attr = s1_parattr;
787
else
788
final_attr = MEMATTR(NC, NC);
789
break;
790
case 0b0110:
791
case 0b1110:
792
final_attr = MEMATTR(WbRaWa, WbRaWa);
793
break;
794
case 0b0111:
795
case 0b1111:
796
/* Preserve S1 attribute */
797
final_attr = s1_parattr;
798
break;
799
case 0b0100:
800
case 0b1100:
801
case 0b1101:
802
/* Reserved, do something non-silly */
803
final_attr = s1_parattr;
804
break;
805
default:
806
/*
807
* MemAttr[2]=0, Device from S2.
808
*
809
* FWB does not influence the way that stage 1
810
* memory types and attributes are combined
811
* with stage 2 Device type and attributes.
812
*/
813
final_attr = min(s2_memattr_to_attr(s2_memattr),
814
s1_parattr);
815
}
816
} else {
817
/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
818
u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
819
820
if (MEMATTR_IS_DEVICE(s1_parattr) ||
821
MEMATTR_IS_DEVICE(s2_parattr)) {
822
final_attr = min(s1_parattr, s2_parattr);
823
} else {
824
/* At this stage, this is memory vs memory */
825
final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
826
s2_parattr & 0xf);
827
final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
828
s2_parattr >> 4) << 4;
829
}
830
}
831
832
if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
833
!MEMATTR_IS_DEVICE(final_attr))
834
final_attr = MEMATTR(NC, NC);
835
836
s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
837
838
par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
839
par |= tr->output & GENMASK(47, 12);
840
par |= FIELD_PREP(SYS_PAR_EL1_SH,
841
combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
842
compute_final_sh(final_attr, s2_sh)));
843
844
return par;
845
}
846
847
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
848
struct s1_walk_result *wr)
849
{
850
u64 par;
851
852
if (wr->failed) {
853
par = SYS_PAR_EL1_RES1;
854
par |= SYS_PAR_EL1_F;
855
par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
856
par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
857
par |= wr->s2 ? SYS_PAR_EL1_S : 0;
858
} else if (wr->level == S1_MMU_DISABLED) {
859
/* MMU off or HCR_EL2.DC == 1 */
860
par = SYS_PAR_EL1_NSE;
861
par |= wr->pa & SYS_PAR_EL1_PA;
862
863
if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
864
(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
865
par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
866
MEMATTR(WbRaWa, WbRaWa));
867
par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
868
} else {
869
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
870
par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
871
}
872
} else {
873
u64 mair, sctlr;
874
u8 sh;
875
876
par = SYS_PAR_EL1_NSE;
877
878
mair = (wi->regime == TR_EL10 ?
879
vcpu_read_sys_reg(vcpu, MAIR_EL1) :
880
vcpu_read_sys_reg(vcpu, MAIR_EL2));
881
882
mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
883
mair &= 0xff;
884
885
sctlr = (wi->regime == TR_EL10 ?
886
vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
887
vcpu_read_sys_reg(vcpu, SCTLR_EL2));
888
889
/* Force NC for memory if SCTLR_ELx.C is clear */
890
if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
891
mair = MEMATTR(NC, NC);
892
893
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
894
par |= wr->pa & SYS_PAR_EL1_PA;
895
896
sh = compute_s1_sh(wi, wr, mair);
897
par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
898
}
899
900
return par;
901
}
902
903
static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
904
{
905
u64 sctlr;
906
907
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
908
return false;
909
910
if (s1pie_enabled(vcpu, regime))
911
return true;
912
913
if (regime == TR_EL10)
914
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
915
else
916
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
917
918
return sctlr & SCTLR_EL1_EPAN;
919
}
920
921
static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
922
struct s1_walk_info *wi,
923
struct s1_walk_result *wr)
924
{
925
bool wxn;
926
927
/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
928
if (wi->regime != TR_EL2) {
929
switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
930
case 0b00:
931
wr->pr = wr->pw = true;
932
wr->ur = wr->uw = false;
933
break;
934
case 0b01:
935
wr->pr = wr->pw = wr->ur = wr->uw = true;
936
break;
937
case 0b10:
938
wr->pr = true;
939
wr->pw = wr->ur = wr->uw = false;
940
break;
941
case 0b11:
942
wr->pr = wr->ur = true;
943
wr->pw = wr->uw = false;
944
break;
945
}
946
947
/* We don't use px for anything yet, but hey... */
948
wr->px = !((wr->desc & PTE_PXN) || wr->uw);
949
wr->ux = !(wr->desc & PTE_UXN);
950
} else {
951
wr->ur = wr->uw = wr->ux = false;
952
953
if (!(wr->desc & PTE_RDONLY)) {
954
wr->pr = wr->pw = true;
955
} else {
956
wr->pr = true;
957
wr->pw = false;
958
}
959
960
/* XN maps to UXN */
961
wr->px = !(wr->desc & PTE_UXN);
962
}
963
964
switch (wi->regime) {
965
case TR_EL2:
966
case TR_EL20:
967
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
968
break;
969
case TR_EL10:
970
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
971
break;
972
}
973
974
wr->pwxn = wr->uwxn = wxn;
975
wr->pov = wi->poe;
976
wr->uov = wi->e0poe;
977
}
978
979
static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
980
struct s1_walk_info *wi,
981
struct s1_walk_result *wr)
982
{
983
/* Hierarchical part of AArch64.S1DirectBasePermissions() */
984
if (wi->regime != TR_EL2) {
985
switch (wr->APTable) {
986
case 0b00:
987
break;
988
case 0b01:
989
wr->ur = wr->uw = false;
990
break;
991
case 0b10:
992
wr->pw = wr->uw = false;
993
break;
994
case 0b11:
995
wr->pw = wr->ur = wr->uw = false;
996
break;
997
}
998
999
wr->px &= !wr->PXNTable;
1000
wr->ux &= !wr->UXNTable;
1001
} else {
1002
if (wr->APTable & BIT(1))
1003
wr->pw = false;
1004
1005
/* XN maps to UXN */
1006
wr->px &= !wr->UXNTable;
1007
}
1008
}
1009
1010
#define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
1011
1012
#define set_priv_perms(wr, r, w, x) \
1013
do { \
1014
(wr)->pr = (r); \
1015
(wr)->pw = (w); \
1016
(wr)->px = (x); \
1017
} while (0)
1018
1019
#define set_unpriv_perms(wr, r, w, x) \
1020
do { \
1021
(wr)->ur = (r); \
1022
(wr)->uw = (w); \
1023
(wr)->ux = (x); \
1024
} while (0)
1025
1026
#define set_priv_wxn(wr, v) \
1027
do { \
1028
(wr)->pwxn = (v); \
1029
} while (0)
1030
1031
#define set_unpriv_wxn(wr, v) \
1032
do { \
1033
(wr)->uwxn = (v); \
1034
} while (0)
1035
1036
/* Similar to AArch64.S1IndirectBasePermissions(), without GCS */
1037
#define set_perms(w, wr, ip) \
1038
do { \
1039
/* R_LLZDZ */ \
1040
switch ((ip)) { \
1041
case 0b0000: \
1042
set_ ## w ## _perms((wr), false, false, false); \
1043
break; \
1044
case 0b0001: \
1045
set_ ## w ## _perms((wr), true , false, false); \
1046
break; \
1047
case 0b0010: \
1048
set_ ## w ## _perms((wr), false, false, true ); \
1049
break; \
1050
case 0b0011: \
1051
set_ ## w ## _perms((wr), true , false, true ); \
1052
break; \
1053
case 0b0100: \
1054
set_ ## w ## _perms((wr), false, false, false); \
1055
break; \
1056
case 0b0101: \
1057
set_ ## w ## _perms((wr), true , true , false); \
1058
break; \
1059
case 0b0110: \
1060
set_ ## w ## _perms((wr), true , true , true ); \
1061
break; \
1062
case 0b0111: \
1063
set_ ## w ## _perms((wr), true , true , true ); \
1064
break; \
1065
case 0b1000: \
1066
set_ ## w ## _perms((wr), true , false, false); \
1067
break; \
1068
case 0b1001: \
1069
set_ ## w ## _perms((wr), true , false, false); \
1070
break; \
1071
case 0b1010: \
1072
set_ ## w ## _perms((wr), true , false, true ); \
1073
break; \
1074
case 0b1011: \
1075
set_ ## w ## _perms((wr), false, false, false); \
1076
break; \
1077
case 0b1100: \
1078
set_ ## w ## _perms((wr), true , true , false); \
1079
break; \
1080
case 0b1101: \
1081
set_ ## w ## _perms((wr), false, false, false); \
1082
break; \
1083
case 0b1110: \
1084
set_ ## w ## _perms((wr), true , true , true ); \
1085
break; \
1086
case 0b1111: \
1087
set_ ## w ## _perms((wr), false, false, false); \
1088
break; \
1089
} \
1090
\
1091
/* R_HJYGR */ \
1092
set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \
1093
\
1094
} while (0)
1095
1096
static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1097
struct s1_walk_info *wi,
1098
struct s1_walk_result *wr)
1099
{
1100
u8 up, pp, idx;
1101
1102
idx = pte_pi_index(wr->desc);
1103
1104
switch (wi->regime) {
1105
case TR_EL10:
1106
pp = perm_idx(vcpu, PIR_EL1, idx);
1107
up = perm_idx(vcpu, PIRE0_EL1, idx);
1108
break;
1109
case TR_EL20:
1110
pp = perm_idx(vcpu, PIR_EL2, idx);
1111
up = perm_idx(vcpu, PIRE0_EL2, idx);
1112
break;
1113
case TR_EL2:
1114
pp = perm_idx(vcpu, PIR_EL2, idx);
1115
up = 0;
1116
break;
1117
}
1118
1119
set_perms(priv, wr, pp);
1120
1121
if (wi->regime != TR_EL2)
1122
set_perms(unpriv, wr, up);
1123
else
1124
set_unpriv_perms(wr, false, false, false);
1125
1126
wr->pov = wi->poe && !(pp & BIT(3));
1127
wr->uov = wi->e0poe && !(up & BIT(3));
1128
1129
/* R_VFPJF */
1130
if (wr->px && wr->uw) {
1131
set_priv_perms(wr, false, false, false);
1132
set_unpriv_perms(wr, false, false, false);
1133
}
1134
}
1135
1136
static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1137
struct s1_walk_info *wi,
1138
struct s1_walk_result *wr)
1139
{
1140
u8 idx, pov_perms, uov_perms;
1141
1142
idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1143
1144
if (wr->pov) {
1145
switch (wi->regime) {
1146
case TR_EL10:
1147
pov_perms = perm_idx(vcpu, POR_EL1, idx);
1148
break;
1149
case TR_EL20:
1150
pov_perms = perm_idx(vcpu, POR_EL2, idx);
1151
break;
1152
case TR_EL2:
1153
pov_perms = perm_idx(vcpu, POR_EL2, idx);
1154
break;
1155
}
1156
1157
if (pov_perms & ~POE_RWX)
1158
pov_perms = POE_NONE;
1159
1160
/* R_QXXPC, S1PrivOverflow enabled */
1161
if (wr->pwxn && (pov_perms & POE_X))
1162
pov_perms &= ~POE_W;
1163
1164
wr->pr &= pov_perms & POE_R;
1165
wr->pw &= pov_perms & POE_W;
1166
wr->px &= pov_perms & POE_X;
1167
}
1168
1169
if (wr->uov) {
1170
switch (wi->regime) {
1171
case TR_EL10:
1172
uov_perms = perm_idx(vcpu, POR_EL0, idx);
1173
break;
1174
case TR_EL20:
1175
uov_perms = perm_idx(vcpu, POR_EL0, idx);
1176
break;
1177
case TR_EL2:
1178
uov_perms = 0;
1179
break;
1180
}
1181
1182
if (uov_perms & ~POE_RWX)
1183
uov_perms = POE_NONE;
1184
1185
/* R_NPBXC, S1UnprivOverlay enabled */
1186
if (wr->uwxn && (uov_perms & POE_X))
1187
uov_perms &= ~POE_W;
1188
1189
wr->ur &= uov_perms & POE_R;
1190
wr->uw &= uov_perms & POE_W;
1191
wr->ux &= uov_perms & POE_X;
1192
}
1193
}
1194
1195
static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1196
struct s1_walk_info *wi,
1197
struct s1_walk_result *wr)
1198
{
1199
bool pan;
1200
1201
if (!s1pie_enabled(vcpu, wi->regime))
1202
compute_s1_direct_permissions(vcpu, wi, wr);
1203
else
1204
compute_s1_indirect_permissions(vcpu, wi, wr);
1205
1206
if (!wi->hpd)
1207
compute_s1_hierarchical_permissions(vcpu, wi, wr);
1208
1209
compute_s1_overlay_permissions(vcpu, wi, wr);
1210
1211
/* R_QXXPC, S1PrivOverlay disabled */
1212
if (!wr->pov)
1213
wr->px &= !(wr->pwxn && wr->pw);
1214
1215
/* R_NPBXC, S1UnprivOverlay disabled */
1216
if (!wr->uov)
1217
wr->ux &= !(wr->uwxn && wr->uw);
1218
1219
pan = wi->pan && (wr->ur || wr->uw ||
1220
(pan3_enabled(vcpu, wi->regime) && wr->ux));
1221
wr->pw &= !pan;
1222
wr->pr &= !pan;
1223
}
1224
1225
static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1226
{
1227
struct s1_walk_result wr = {};
1228
struct s1_walk_info wi = {};
1229
bool perm_fail = false;
1230
int ret, idx;
1231
1232
wi.regime = compute_translation_regime(vcpu, op);
1233
wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
1234
wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
1235
(*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
1236
1237
ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
1238
if (ret)
1239
goto compute_par;
1240
1241
if (wr.level == S1_MMU_DISABLED)
1242
goto compute_par;
1243
1244
idx = srcu_read_lock(&vcpu->kvm->srcu);
1245
1246
ret = walk_s1(vcpu, &wi, &wr, vaddr);
1247
1248
srcu_read_unlock(&vcpu->kvm->srcu, idx);
1249
1250
if (ret)
1251
goto compute_par;
1252
1253
compute_s1_permissions(vcpu, &wi, &wr);
1254
1255
switch (op) {
1256
case OP_AT_S1E1RP:
1257
case OP_AT_S1E1R:
1258
case OP_AT_S1E2R:
1259
perm_fail = !wr.pr;
1260
break;
1261
case OP_AT_S1E1WP:
1262
case OP_AT_S1E1W:
1263
case OP_AT_S1E2W:
1264
perm_fail = !wr.pw;
1265
break;
1266
case OP_AT_S1E0R:
1267
perm_fail = !wr.ur;
1268
break;
1269
case OP_AT_S1E0W:
1270
perm_fail = !wr.uw;
1271
break;
1272
case OP_AT_S1E1A:
1273
case OP_AT_S1E2A:
1274
break;
1275
default:
1276
BUG();
1277
}
1278
1279
if (perm_fail)
1280
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
1281
1282
compute_par:
1283
return compute_par_s1(vcpu, &wi, &wr);
1284
}
1285
1286
/*
1287
* Return the PAR_EL1 value as the result of a valid translation.
1288
*
1289
* If the translation is unsuccessful, the value may only contain
1290
* PAR_EL1.F, and cannot be taken at face value. It isn't an
1291
* indication of the translation having failed, only that the fast
1292
* path did not succeed, *unless* it indicates a S1 permission or
1293
* access fault.
1294
*/
1295
static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1296
{
1297
struct mmu_config config;
1298
struct kvm_s2_mmu *mmu;
1299
bool fail, mmu_cs;
1300
u64 par;
1301
1302
par = SYS_PAR_EL1_F;
1303
1304
/*
1305
* We've trapped, so everything is live on the CPU. As we will
1306
* be switching contexts behind everybody's back, disable
1307
* interrupts while holding the mmu lock.
1308
*/
1309
guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1310
1311
/*
1312
* If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1313
* the right one (as we trapped from vEL2). If not, save the
1314
* full MMU context.
1315
*
1316
* We are also guaranteed to be in the correct context if
1317
* we're not in a nested VM.
1318
*/
1319
mmu_cs = (vcpu_has_nv(vcpu) &&
1320
!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
1321
if (!mmu_cs)
1322
goto skip_mmu_switch;
1323
1324
/*
1325
* Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1326
* find it (recycled by another vcpu, for example). When this
1327
* happens, admit defeat immediately and use the SW (slow) path.
1328
*/
1329
mmu = lookup_s2_mmu(vcpu);
1330
if (!mmu)
1331
return par;
1332
1333
__mmu_config_save(&config);
1334
1335
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
1336
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
1337
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
1338
write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
1339
if (kvm_has_tcr2(vcpu->kvm)) {
1340
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1341
if (kvm_has_s1pie(vcpu->kvm)) {
1342
write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1343
write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1344
}
1345
if (kvm_has_s1poe(vcpu->kvm)) {
1346
write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1347
write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1348
}
1349
}
1350
write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
1351
__load_stage2(mmu, mmu->arch);
1352
1353
skip_mmu_switch:
1354
/* Temporarily switch back to guest context */
1355
write_sysreg_hcr(vcpu->arch.hcr_el2);
1356
isb();
1357
1358
switch (op) {
1359
case OP_AT_S1E1RP:
1360
case OP_AT_S1E1WP:
1361
fail = at_s1e1p_fast(vcpu, op, vaddr);
1362
break;
1363
case OP_AT_S1E1R:
1364
fail = __kvm_at(OP_AT_S1E1R, vaddr);
1365
break;
1366
case OP_AT_S1E1W:
1367
fail = __kvm_at(OP_AT_S1E1W, vaddr);
1368
break;
1369
case OP_AT_S1E0R:
1370
fail = __kvm_at(OP_AT_S1E0R, vaddr);
1371
break;
1372
case OP_AT_S1E0W:
1373
fail = __kvm_at(OP_AT_S1E0W, vaddr);
1374
break;
1375
case OP_AT_S1E1A:
1376
fail = __kvm_at(OP_AT_S1E1A, vaddr);
1377
break;
1378
default:
1379
WARN_ON_ONCE(1);
1380
fail = true;
1381
break;
1382
}
1383
1384
if (!fail)
1385
par = read_sysreg_par();
1386
1387
write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
1388
1389
if (mmu_cs)
1390
__mmu_config_restore(&config);
1391
1392
return par;
1393
}
1394
1395
static bool par_check_s1_perm_fault(u64 par)
1396
{
1397
u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1398
1399
return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1400
!(par & SYS_PAR_EL1_S));
1401
}
1402
1403
static bool par_check_s1_access_fault(u64 par)
1404
{
1405
u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1406
1407
return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
1408
!(par & SYS_PAR_EL1_S));
1409
}
1410
1411
void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1412
{
1413
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1414
1415
/*
1416
* If PAR_EL1 reports that AT failed on a S1 permission or access
1417
* fault, we know for sure that the PTW was able to walk the S1
1418
* tables and there's nothing else to do.
1419
*
1420
* If AT failed for any other reason, then we must walk the guest S1
1421
* to emulate the instruction.
1422
*/
1423
if ((par & SYS_PAR_EL1_F) &&
1424
!par_check_s1_perm_fault(par) &&
1425
!par_check_s1_access_fault(par))
1426
par = handle_at_slow(vcpu, op, vaddr);
1427
1428
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1429
}
1430
1431
void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1432
{
1433
u64 par;
1434
1435
/*
1436
* We've trapped, so everything is live on the CPU. As we will be
1437
* switching context behind everybody's back, disable interrupts...
1438
*/
1439
scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1440
u64 val, hcr;
1441
bool fail;
1442
1443
val = hcr = read_sysreg(hcr_el2);
1444
val &= ~HCR_TGE;
1445
val |= HCR_VM;
1446
1447
if (!vcpu_el2_e2h_is_set(vcpu))
1448
val |= HCR_NV | HCR_NV1;
1449
1450
write_sysreg_hcr(val);
1451
isb();
1452
1453
par = SYS_PAR_EL1_F;
1454
1455
switch (op) {
1456
case OP_AT_S1E2R:
1457
fail = __kvm_at(OP_AT_S1E1R, vaddr);
1458
break;
1459
case OP_AT_S1E2W:
1460
fail = __kvm_at(OP_AT_S1E1W, vaddr);
1461
break;
1462
case OP_AT_S1E2A:
1463
fail = __kvm_at(OP_AT_S1E1A, vaddr);
1464
break;
1465
default:
1466
WARN_ON_ONCE(1);
1467
fail = true;
1468
}
1469
1470
isb();
1471
1472
if (!fail)
1473
par = read_sysreg_par();
1474
1475
write_sysreg_hcr(hcr);
1476
isb();
1477
}
1478
1479
/* We failed the translation, let's replay it in slow motion */
1480
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1481
par = handle_at_slow(vcpu, op, vaddr);
1482
1483
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1484
}
1485
1486
void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1487
{
1488
struct kvm_s2_trans out = {};
1489
u64 ipa, par;
1490
bool write;
1491
int ret;
1492
1493
/* Do the stage-1 translation */
1494
switch (op) {
1495
case OP_AT_S12E1R:
1496
op = OP_AT_S1E1R;
1497
write = false;
1498
break;
1499
case OP_AT_S12E1W:
1500
op = OP_AT_S1E1W;
1501
write = true;
1502
break;
1503
case OP_AT_S12E0R:
1504
op = OP_AT_S1E0R;
1505
write = false;
1506
break;
1507
case OP_AT_S12E0W:
1508
op = OP_AT_S1E0W;
1509
write = true;
1510
break;
1511
default:
1512
WARN_ON_ONCE(1);
1513
return;
1514
}
1515
1516
__kvm_at_s1e01(vcpu, op, vaddr);
1517
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1518
if (par & SYS_PAR_EL1_F)
1519
return;
1520
1521
/*
1522
* If we only have a single stage of translation (EL2&0), exit
1523
* early. Same thing if {VM,DC}=={0,0}.
1524
*/
1525
if (compute_translation_regime(vcpu, op) == TR_EL20 ||
1526
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1527
return;
1528
1529
/* Do the stage-2 translation */
1530
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1531
out.esr = 0;
1532
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1533
if (ret < 0)
1534
return;
1535
1536
/* Check the access permission */
1537
if (!out.esr &&
1538
((!write && !out.readable) || (write && !out.writable)))
1539
out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1540
1541
par = compute_par_s12(vcpu, par, &out);
1542
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1543
}
1544
1545
/*
1546
* Translate a VA for a given EL in a given translation regime, with
1547
* or without PAN. This requires wi->{regime, as_el0, pan} to be
1548
* set. The rest of the wi and wr should be 0-initialised.
1549
*/
1550
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
1551
struct s1_walk_result *wr, u64 va)
1552
{
1553
int ret;
1554
1555
ret = setup_s1_walk(vcpu, wi, wr, va);
1556
if (ret)
1557
return ret;
1558
1559
if (wr->level == S1_MMU_DISABLED) {
1560
wr->ur = wr->uw = wr->ux = true;
1561
wr->pr = wr->pw = wr->px = true;
1562
} else {
1563
ret = walk_s1(vcpu, wi, wr, va);
1564
if (ret)
1565
return ret;
1566
1567
compute_s1_permissions(vcpu, wi, wr);
1568
}
1569
1570
return 0;
1571
}
1572
1573
struct desc_match {
1574
u64 ipa;
1575
int level;
1576
};
1577
1578
static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
1579
{
1580
struct desc_match *dm = priv;
1581
u64 ipa = dm->ipa;
1582
1583
/* Use S1 granule alignment */
1584
ipa &= GENMASK(51, ctxt->wi->pgshift);
1585
1586
/* Not the IPA we're looking for? Continue. */
1587
if (ipa != ctxt->table_ipa)
1588
return 0;
1589
1590
/* Note the level and interrupt the walk */
1591
dm->level = ctxt->level;
1592
return -EINTR;
1593
}
1594
1595
int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
1596
{
1597
struct desc_match dm = {
1598
.ipa = ipa,
1599
};
1600
struct s1_walk_info wi = {
1601
.filter = &(struct s1_walk_filter){
1602
.fn = match_s1_desc,
1603
.priv = &dm,
1604
},
1605
.regime = TR_EL10,
1606
.as_el0 = false,
1607
.pan = false,
1608
};
1609
struct s1_walk_result wr = {};
1610
int ret;
1611
1612
ret = setup_s1_walk(vcpu, &wi, &wr, va);
1613
if (ret)
1614
return ret;
1615
1616
/* We really expect the S1 MMU to be on here... */
1617
if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
1618
*level = 0;
1619
return 0;
1620
}
1621
1622
/* Walk the guest's PT, looking for a match along the way */
1623
ret = walk_s1(vcpu, &wi, &wr, va);
1624
switch (ret) {
1625
case -EINTR:
1626
/* We interrupted the walk on a match, return the level */
1627
*level = dm.level;
1628
return 0;
1629
case 0:
1630
/* The walk completed, we failed to find the entry */
1631
return -ENOENT;
1632
default:
1633
/* Any other error... */
1634
return ret;
1635
}
1636
}
1637
1638