Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/pkvm.c
29520 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (C) 2020 - Google LLC
4
* Author: Quentin Perret <[email protected]>
5
*/
6
7
#include <linux/init.h>
8
#include <linux/interval_tree_generic.h>
9
#include <linux/kmemleak.h>
10
#include <linux/kvm_host.h>
11
#include <asm/kvm_mmu.h>
12
#include <linux/memblock.h>
13
#include <linux/mutex.h>
14
15
#include <asm/kvm_pkvm.h>
16
17
#include "hyp_constants.h"
18
19
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
20
21
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
22
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
23
24
phys_addr_t hyp_mem_base;
25
phys_addr_t hyp_mem_size;
26
27
static int __init register_memblock_regions(void)
28
{
29
struct memblock_region *reg;
30
31
for_each_mem_region(reg) {
32
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
33
return -ENOMEM;
34
35
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
36
(*hyp_memblock_nr_ptr)++;
37
}
38
39
return 0;
40
}
41
42
void __init kvm_hyp_reserve(void)
43
{
44
u64 hyp_mem_pages = 0;
45
int ret;
46
47
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
48
return;
49
50
if (kvm_get_mode() != KVM_MODE_PROTECTED)
51
return;
52
53
ret = register_memblock_regions();
54
if (ret) {
55
*hyp_memblock_nr_ptr = 0;
56
kvm_err("Failed to register hyp memblocks: %d\n", ret);
57
return;
58
}
59
60
hyp_mem_pages += hyp_s1_pgtable_pages();
61
hyp_mem_pages += host_s2_pgtable_pages();
62
hyp_mem_pages += hyp_vm_table_pages();
63
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
64
hyp_mem_pages += pkvm_selftest_pages();
65
hyp_mem_pages += hyp_ffa_proxy_pages();
66
67
/*
68
* Try to allocate a PMD-aligned region to reduce TLB pressure once
69
* this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
70
*/
71
hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
72
hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
73
PMD_SIZE);
74
if (!hyp_mem_base)
75
hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
76
else
77
hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
78
79
if (!hyp_mem_base) {
80
kvm_err("Failed to reserve hyp memory\n");
81
return;
82
}
83
84
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
85
hyp_mem_base);
86
}
87
88
static void __pkvm_destroy_hyp_vm(struct kvm *kvm)
89
{
90
if (pkvm_hyp_vm_is_created(kvm)) {
91
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
92
kvm->arch.pkvm.handle));
93
} else if (kvm->arch.pkvm.handle) {
94
/*
95
* The VM could have been reserved but hyp initialization has
96
* failed. Make sure to unreserve it.
97
*/
98
kvm_call_hyp_nvhe(__pkvm_unreserve_vm, kvm->arch.pkvm.handle);
99
}
100
101
kvm->arch.pkvm.handle = 0;
102
kvm->arch.pkvm.is_created = false;
103
free_hyp_memcache(&kvm->arch.pkvm.teardown_mc);
104
free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc);
105
}
106
107
static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
108
{
109
size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
110
pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
111
void *hyp_vcpu;
112
int ret;
113
114
vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
115
116
hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
117
if (!hyp_vcpu)
118
return -ENOMEM;
119
120
ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
121
if (!ret)
122
vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
123
else
124
free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
125
126
return ret;
127
}
128
129
/*
130
* Allocates and donates memory for hypervisor VM structs at EL2.
131
*
132
* Allocates space for the VM state, which includes the hyp vm as well as
133
* the hyp vcpus.
134
*
135
* Stores an opaque handler in the kvm struct for future reference.
136
*
137
* Return 0 on success, negative error code on failure.
138
*/
139
static int __pkvm_create_hyp_vm(struct kvm *kvm)
140
{
141
size_t pgd_sz, hyp_vm_sz;
142
void *pgd, *hyp_vm;
143
int ret;
144
145
if (kvm->created_vcpus < 1)
146
return -EINVAL;
147
148
pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.mmu.vtcr);
149
150
/*
151
* The PGD pages will be reclaimed using a hyp_memcache which implies
152
* page granularity. So, use alloc_pages_exact() to get individual
153
* refcounts.
154
*/
155
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
156
if (!pgd)
157
return -ENOMEM;
158
159
/* Allocate memory to donate to hyp for vm and vcpu pointers. */
160
hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
161
size_mul(sizeof(void *),
162
kvm->created_vcpus)));
163
hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
164
if (!hyp_vm) {
165
ret = -ENOMEM;
166
goto free_pgd;
167
}
168
169
/* Donate the VM memory to hyp and let hyp initialize it. */
170
ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd);
171
if (ret)
172
goto free_vm;
173
174
kvm->arch.pkvm.is_created = true;
175
kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
176
kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
177
178
return 0;
179
free_vm:
180
free_pages_exact(hyp_vm, hyp_vm_sz);
181
free_pgd:
182
free_pages_exact(pgd, pgd_sz);
183
return ret;
184
}
185
186
bool pkvm_hyp_vm_is_created(struct kvm *kvm)
187
{
188
return READ_ONCE(kvm->arch.pkvm.is_created);
189
}
190
191
int pkvm_create_hyp_vm(struct kvm *kvm)
192
{
193
int ret = 0;
194
195
mutex_lock(&kvm->arch.config_lock);
196
if (!pkvm_hyp_vm_is_created(kvm))
197
ret = __pkvm_create_hyp_vm(kvm);
198
mutex_unlock(&kvm->arch.config_lock);
199
200
return ret;
201
}
202
203
int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
204
{
205
int ret = 0;
206
207
mutex_lock(&vcpu->kvm->arch.config_lock);
208
if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
209
ret = __pkvm_create_hyp_vcpu(vcpu);
210
mutex_unlock(&vcpu->kvm->arch.config_lock);
211
212
return ret;
213
}
214
215
void pkvm_destroy_hyp_vm(struct kvm *kvm)
216
{
217
mutex_lock(&kvm->arch.config_lock);
218
__pkvm_destroy_hyp_vm(kvm);
219
mutex_unlock(&kvm->arch.config_lock);
220
}
221
222
int pkvm_init_host_vm(struct kvm *kvm)
223
{
224
int ret;
225
226
if (pkvm_hyp_vm_is_created(kvm))
227
return -EINVAL;
228
229
/* VM is already reserved, no need to proceed. */
230
if (kvm->arch.pkvm.handle)
231
return 0;
232
233
/* Reserve the VM in hyp and obtain a hyp handle for the VM. */
234
ret = kvm_call_hyp_nvhe(__pkvm_reserve_vm);
235
if (ret < 0)
236
return ret;
237
238
kvm->arch.pkvm.handle = ret;
239
240
return 0;
241
}
242
243
static void __init _kvm_host_prot_finalize(void *arg)
244
{
245
int *err = arg;
246
247
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
248
WRITE_ONCE(*err, -EINVAL);
249
}
250
251
static int __init pkvm_drop_host_privileges(void)
252
{
253
int ret = 0;
254
255
/*
256
* Flip the static key upfront as that may no longer be possible
257
* once the host stage 2 is installed.
258
*/
259
static_branch_enable(&kvm_protected_mode_initialized);
260
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
261
return ret;
262
}
263
264
static int __init finalize_pkvm(void)
265
{
266
int ret;
267
268
if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
269
return 0;
270
271
/*
272
* Exclude HYP sections from kmemleak so that they don't get peeked
273
* at, which would end badly once inaccessible.
274
*/
275
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
276
kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
277
kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
278
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
279
280
ret = pkvm_drop_host_privileges();
281
if (ret)
282
pr_err("Failed to finalize Hyp protection: %d\n", ret);
283
284
return ret;
285
}
286
device_initcall_sync(finalize_pkvm);
287
288
static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
289
{
290
return m->gfn * PAGE_SIZE;
291
}
292
293
static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
294
{
295
return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
296
}
297
298
INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
299
__pkvm_mapping_start, __pkvm_mapping_end, static,
300
pkvm_mapping);
301
302
/*
303
* __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow
304
* freeing of __map inline.
305
*/
306
#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
307
for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \
308
__start, __end - 1); \
309
__tmp && ({ \
310
__map = __tmp; \
311
__tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \
312
true; \
313
}); \
314
)
315
316
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
317
struct kvm_pgtable_mm_ops *mm_ops)
318
{
319
pgt->pkvm_mappings = RB_ROOT_CACHED;
320
pgt->mmu = mmu;
321
322
return 0;
323
}
324
325
static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
326
{
327
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
328
pkvm_handle_t handle = kvm->arch.pkvm.handle;
329
struct pkvm_mapping *mapping;
330
int ret;
331
332
if (!handle)
333
return 0;
334
335
for_each_mapping_in_range_safe(pgt, start, end, mapping) {
336
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
337
mapping->nr_pages);
338
if (WARN_ON(ret))
339
return ret;
340
pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
341
kfree(mapping);
342
}
343
344
return 0;
345
}
346
347
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
348
{
349
__pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
350
}
351
352
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
353
u64 phys, enum kvm_pgtable_prot prot,
354
void *mc, enum kvm_pgtable_walk_flags flags)
355
{
356
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
357
struct pkvm_mapping *mapping = NULL;
358
struct kvm_hyp_memcache *cache = mc;
359
u64 gfn = addr >> PAGE_SHIFT;
360
u64 pfn = phys >> PAGE_SHIFT;
361
int ret;
362
363
if (size != PAGE_SIZE && size != PMD_SIZE)
364
return -EINVAL;
365
366
lockdep_assert_held_write(&kvm->mmu_lock);
367
368
/*
369
* Calling stage2_map() on top of existing mappings is either happening because of a race
370
* with another vCPU, or because we're changing between page and block mappings. As per
371
* user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
372
*/
373
mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
374
if (mapping) {
375
if (size == (mapping->nr_pages * PAGE_SIZE))
376
return -EAGAIN;
377
378
/* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
379
ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
380
if (ret)
381
return ret;
382
mapping = NULL;
383
}
384
385
ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot);
386
if (WARN_ON(ret))
387
return ret;
388
389
swap(mapping, cache->mapping);
390
mapping->gfn = gfn;
391
mapping->pfn = pfn;
392
mapping->nr_pages = size / PAGE_SIZE;
393
pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
394
395
return ret;
396
}
397
398
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
399
{
400
lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
401
402
return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
403
}
404
405
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
406
{
407
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
408
pkvm_handle_t handle = kvm->arch.pkvm.handle;
409
struct pkvm_mapping *mapping;
410
int ret = 0;
411
412
lockdep_assert_held(&kvm->mmu_lock);
413
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
414
ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
415
mapping->nr_pages);
416
if (WARN_ON(ret))
417
break;
418
}
419
420
return ret;
421
}
422
423
int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
424
{
425
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
426
struct pkvm_mapping *mapping;
427
428
lockdep_assert_held(&kvm->mmu_lock);
429
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
430
__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
431
PAGE_SIZE * mapping->nr_pages);
432
433
return 0;
434
}
435
436
bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
437
{
438
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
439
pkvm_handle_t handle = kvm->arch.pkvm.handle;
440
struct pkvm_mapping *mapping;
441
bool young = false;
442
443
lockdep_assert_held(&kvm->mmu_lock);
444
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
445
young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
446
mapping->nr_pages, mkold);
447
448
return young;
449
}
450
451
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
452
enum kvm_pgtable_walk_flags flags)
453
{
454
return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
455
}
456
457
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
458
enum kvm_pgtable_walk_flags flags)
459
{
460
WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
461
}
462
463
void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
464
{
465
WARN_ON_ONCE(1);
466
}
467
468
kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
469
enum kvm_pgtable_prot prot, void *mc, bool force_pte)
470
{
471
WARN_ON_ONCE(1);
472
return NULL;
473
}
474
475
int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
476
struct kvm_mmu_memory_cache *mc)
477
{
478
WARN_ON_ONCE(1);
479
return -EINVAL;
480
}
481
482